sea.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. <?php
  2. class SEA
  3. {
  4. // South East Asian shaper
  5. // sea_category
  6. const OT_X = 0;
  7. const OT_C = 1;
  8. const OT_IV = 2; # Independent Vowel
  9. const OT_T = 3; # Tone Marks
  10. const OT_H = 4; # Halant
  11. const OT_A = 10; # Anusvara
  12. const OT_GB = 12; # Generic Base (OT_DOTTEDCIRCLE in Indic)
  13. const OT_CM = 17; # Consonant Medial
  14. const OT_MR = 22; # Medial Ra
  15. const OT_VAbv = 26;
  16. const OT_VBlw = 27;
  17. const OT_VPre = 28;
  18. const OT_VPst = 29;
  19. // ? From Indic categories
  20. const OT_ZWNJ = 5;
  21. const OT_ZWJ = 6;
  22. const OT_M = 7;
  23. const OT_SM = 8;
  24. const OT_VD = 9;
  25. const OT_NBSP = 11;
  26. const OT_RS = 13;
  27. const OT_Coeng = 14;
  28. const OT_Repha = 15;
  29. const OT_Ra = 16;
  30. // Based on sea_category used to make string to find syllables
  31. // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-sea-private.hh
  32. public static $sea_category_char = array(
  33. 'x',
  34. 'C',
  35. 'V',
  36. 'T',
  37. 'H',
  38. 'x',
  39. 'x',
  40. 'x',
  41. 'x',
  42. 'x',
  43. 'A',
  44. 'x',
  45. 'G',
  46. 'x',
  47. 'x',
  48. 'x',
  49. 'x',
  50. 'M',
  51. 'x',
  52. 'x',
  53. 'x',
  54. 'x',
  55. 'R',
  56. 'x',
  57. 'x',
  58. 'x',
  59. 'a',
  60. 'b',
  61. 'p',
  62. 't',
  63. );
  64. /* Visual positions in a syllable from left to right. */
  65. // sea_position
  66. const POS_START = 0;
  67. const POS_RA_TO_BECOME_REPH = 1;
  68. const POS_PRE_M = 2;
  69. const POS_PRE_C = 3;
  70. const POS_BASE_C = 4;
  71. const POS_AFTER_MAIN = 5;
  72. const POS_ABOVE_C = 6;
  73. const POS_BEFORE_SUB = 7;
  74. const POS_BELOW_C = 8;
  75. const POS_AFTER_SUB = 9;
  76. const POS_BEFORE_POST = 10;
  77. const POS_POST_C = 11;
  78. const POS_AFTER_POST = 12;
  79. const POS_FINAL_C = 13;
  80. const POS_SMVD = 14;
  81. const POS_END = 15;
  82. public static function set_sea_properties(&$info, $scriptblock)
  83. {
  84. $u = $info['uni'];
  85. $type = self::sea_get_categories($u);
  86. $cat = ($type & 0x7F);
  87. $pos = ($type >> 8);
  88. /*
  89. * Re-assign category
  90. */
  91. // Medial Ra
  92. if ($u == 0x1A55 || $u == 0xAA34) {
  93. $cat = self::OT_MR;
  94. }
  95. /*
  96. * Re-assign position.
  97. */
  98. if ($cat == self::OT_M) { // definitely "OT_M" in HarfBuzz - although this does not seem to have been defined ? should be OT_MR
  99. switch ($pos) {
  100. case self::POS_PRE_C: $cat = self::OT_VPre;
  101. break;
  102. case self::POS_ABOVE_C: $cat = self::OT_VAbv;
  103. break;
  104. case self::POS_BELOW_C: $cat = self::OT_VBlw;
  105. break;
  106. case self::POS_POST_C: $cat = self::OT_VPst;
  107. break;
  108. }
  109. }
  110. $info['sea_category'] = $cat;
  111. $info['sea_position'] = $pos;
  112. }
  113. // syllable_type
  114. const CONSONANT_SYLLABLE = 0;
  115. const BROKEN_CLUSTER = 1;
  116. const NON_SEA_CLUSTER = 2;
  117. public static function set_syllables(&$o, $s, &$broken_syllables)
  118. {
  119. $ptr = 0;
  120. $syllable_serial = 1;
  121. $broken_syllables = false;
  122. while ($ptr < strlen($s)) {
  123. $match = '';
  124. $syllable_length = 1;
  125. $syllable_type = self::NON_SEA_CLUSTER;
  126. // CONSONANT_SYLLABLE Consonant syllable
  127. if (preg_match('/^(C|V|G)(p|a|b|t|HC|M|R|T|A)*/', substr($s, $ptr), $ma)) {
  128. $syllable_length = strlen($ma[0]);
  129. $syllable_type = self::CONSONANT_SYLLABLE;
  130. }
  131. // BROKEN_CLUSTER syllable
  132. else if (preg_match('/^(p|a|b|t|HC|M|R|T|A)+/', substr($s, $ptr), $ma)) {
  133. $syllable_length = strlen($ma[0]);
  134. $syllable_type = self::BROKEN_CLUSTER;
  135. $broken_syllables = true;
  136. }
  137. for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
  138. $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
  139. }
  140. $ptr += $syllable_length;
  141. $syllable_serial++;
  142. if ($syllable_serial == 16)
  143. $syllable_serial = 1;
  144. }
  145. }
  146. public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $scriptblock, $dottedcircle)
  147. {
  148. if ($broken_syllables && $dottedcircle) {
  149. self::insert_dotted_circles($info, $dottedcircle);
  150. }
  151. $count = count($info);
  152. if (!$count)
  153. return;
  154. $last = 0;
  155. $last_syllable = $info[0]['syllable'];
  156. for ($i = 1; $i < $count; $i++) {
  157. if ($last_syllable != $info[$i]['syllable']) {
  158. self::initial_reordering_syllable($info, $GSUBdata, $scriptblock, $last, $i);
  159. $last = $i;
  160. $last_syllable = $info[$last]['syllable'];
  161. }
  162. }
  163. self::initial_reordering_syllable($info, $GSUBdata, $scriptblock, $last, $count);
  164. }
  165. public static function insert_dotted_circles(&$info, $dottedcircle)
  166. {
  167. $idx = 0;
  168. $last_syllable = 0;
  169. while ($idx < count($info)) {
  170. $syllable = $info[$idx]['syllable'];
  171. $syllable_type = ($syllable & 0x0F);
  172. if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
  173. $last_syllable = $syllable;
  174. $dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
  175. array_splice($info, $idx, 0, $dottedcircle);
  176. } else
  177. $idx++;
  178. }
  179. }
  180. public static function initial_reordering_syllable(&$info, $GSUBdata, $scriptblock, $start, $end)
  181. {
  182. /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
  183. $syllable_type = ($info[$start]['syllable'] & 0x0F);
  184. if ($syllable_type == self::NON_SEA_CLUSTER) {
  185. return;
  186. }
  187. if ($syllable_type == self::BROKEN_CLUSTER) {
  188. /* For dotted-circle, this is what Uniscribe does:
  189. * If dotted-circle is the last glyph, it just does nothing. */
  190. if ($info[$end - 1]['sea_category'] == self::OT_GB) {
  191. return;
  192. }
  193. }
  194. $base = $start;
  195. $i = $start;
  196. for (; $i < $base; $i++)
  197. $info[$i]['sea_position'] = self::POS_PRE_C;
  198. if ($i < $end) {
  199. $info[$i]['sea_position'] = self::POS_BASE_C;
  200. $i++;
  201. }
  202. for (; $i < $end; $i++) {
  203. if (isset($info[$i]['sea_category']) && $info[$i]['sea_category'] == self::OT_MR) { /* Pre-base reordering */
  204. $info[$i]['sea_position'] = self::POS_PRE_C;
  205. continue;
  206. }
  207. if (isset($info[$i]['sea_category']) && $info[$i]['sea_category'] == self::OT_VPre) { /* Left matra */
  208. $info[$i]['sea_position'] = self::POS_PRE_M;
  209. continue;
  210. }
  211. $info[$i]['sea_position'] = self::POS_AFTER_MAIN;
  212. }
  213. /* Sit tight, rock 'n roll! */
  214. self::bubble_sort($info, $start, $end - $start);
  215. }
  216. public static function final_reordering(&$info, $GSUBdata, $scriptblock)
  217. {
  218. $count = count($info);
  219. if (!$count)
  220. return;
  221. $last = 0;
  222. $last_syllable = $info[0]['syllable'];
  223. for ($i = 1; $i < $count; $i++) {
  224. if ($last_syllable != $info[$i]['syllable']) {
  225. self::final_reordering_syllable($info, $GSUBdata, $scriptblock, $last, $i);
  226. $last = $i;
  227. $last_syllable = $info[$last]['syllable'];
  228. }
  229. }
  230. self::final_reordering_syllable($info, $GSUBdata, $scriptblock, $last, $count);
  231. }
  232. public static function final_reordering_syllable(&$info, $GSUBdata, $scriptblock, $start, $end)
  233. {
  234. /*
  235. * Nothing to do here at present!
  236. */
  237. }
  238. public static $sea_table = array(
  239. /* New Tai Lue (1980..19DF) */
  240. /* 1980 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  241. /* 1988 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  242. /* 1990 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  243. /* 1998 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  244. /* 19A0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  245. /* 19A8 */ 3841, 3841, 3841, 3841, 3840, 3840, 3840, 3840,
  246. /* 19B0 */ 2823, 2823, 2823, 2823, 2823, 775, 775, 775,
  247. /* 19B8 */ 2823, 2823, 775, 2823, 2823, 2823, 2823, 2823,
  248. /* 19C0 */ 2823, 3857, 3857, 3857, 3857, 3857, 3857, 3857,
  249. /* 19C8 */ 3843, 3843, 3840, 3840, 3840, 3840, 3840, 3840,
  250. /* 19D0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  251. /* 19D8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  252. /* Tai Tham (1A20..1AAF) */
  253. /* 1A20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  254. /* 1A28 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  255. /* 1A30 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  256. /* 1A38 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  257. /* 1A40 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  258. /* 1A48 */ 3841, 3841, 3841, 3841, 3841, 3842, 3842, 3842,
  259. /* 1A50 */ 3842, 3842, 3842, 3841, 3841, 3857, 3857, 3857,
  260. /* 1A58 */ 3857, 3857, 3857, 3857, 3857, 3857, 3857, 3840,
  261. /* 1A60 */ 3844, 2823, 1543, 2823, 2823, 1543, 1543, 1543,
  262. /* 1A68 */ 1543, 2055, 2055, 1543, 2055, 2823, 775, 775,
  263. /* 1A70 */ 775, 775, 775, 1543, 1543, 3843, 3843, 3843,
  264. /* 1A78 */ 3843, 3843, 3840, 3840, 3840, 3840, 3840, 3840,
  265. /* 1A80 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  266. /* 1A88 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  267. /* 1A90 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  268. /* 1A98 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  269. /* 1AA0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  270. /* 1AA8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  271. /* Cham (AA00..AA5F) */
  272. /* AA00 */ 3842, 3842, 3842, 3842, 3842, 3842, 3841, 3841,
  273. /* AA08 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  274. /* AA10 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  275. /* AA18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  276. /* AA20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
  277. /* AA28 */ 3841, 1543, 1543, 1543, 1543, 2055, 1543, 775,
  278. /* AA30 */ 775, 1543, 2055, 3857, 3857, 3857, 3857, 3840,
  279. /* AA38 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  280. /* AA40 */ 3857, 3857, 3857, 3857, 3857, 3857, 3857, 3857,
  281. /* AA48 */ 3857, 3857, 3857, 3857, 3857, 3857, 3840, 3840,
  282. /* AA50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  283. /* AA58 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
  284. );
  285. public static function sea_get_categories($u)
  286. {
  287. if (0x1980 <= $u && $u <= 0x19DF)
  288. return self::$sea_table[$u - 0x1980]; // offset 0 for New Tai Lue
  289. if (0x1A20 <= $u && $u <= 0x1AAF)
  290. return self::$sea_table[$u - 0x1A20 + 96]; // offset for Tai Tham
  291. if (0xAA00 <= $u && $u <= 0xAA5F)
  292. return self::$sea_table[$u - 0xAA00 + 96 + 144]; // Cham
  293. if ($u == 0x00A0)
  294. return 3851; // (ISC_CP | (IMC_x << 8))
  295. if ($u == 0x25CC)
  296. return 3851; // (ISC_CP | (IMC_x << 8))
  297. return 3840; // (ISC_x | (IMC_x << 8))
  298. }
  299. public static function bubble_sort(&$arr, $start, $len)
  300. {
  301. if ($len < 2) {
  302. return;
  303. }
  304. $k = $start + $len - 2;
  305. while ($k >= $start) {
  306. for ($j = $start; $j <= $k; $j++) {
  307. if ($arr[$j]['sea_position'] > $arr[$j + 1]['sea_position']) {
  308. $t = $arr[$j];
  309. $arr[$j] = $arr[$j + 1];
  310. $arr[$j + 1] = $t;
  311. }
  312. }
  313. $k--;
  314. }
  315. }
  316. }