diff --git a/scripts/gen-universal-table.py b/scripts/gen-universal-table.py index 5999e10f..45ec2ffb 100755 --- a/scripts/gen-universal-table.py +++ b/scripts/gen-universal-table.py @@ -6,10 +6,16 @@ import os import urllib.request -DISABLED_BLOCKS = ['Samaritan', 'Thai', 'Lao'] +DISABLED_SCRIPTS = { + 'Arabic', + 'Lao', + 'Samaritan', + 'Syriac', + 'Thai', +} -files = ['IndicSyllabicCategory.txt', 'IndicPositionalCategory.txt', - 'UnicodeData.txt', 'ArabicShaping.txt', 'Blocks.txt', +files = ['IndicSyllabicCategory.txt', 'IndicPositionalCategory.txt', 'ArabicShaping.txt', + 'DerivedCoreProperties.txt', 'UnicodeData.txt', 'Blocks.txt', 'Scripts.txt', 'ms-use/IndicSyllabicCategory-Additional.txt', 'ms-use/IndicPositionalCategory-Additional.txt'] for f in files: if not os.path.exists(f): @@ -20,25 +26,18 @@ headers = [[f.readline() for i in range(2)] for j, f in enumerate(files) if j != 2] -for j in range(5, 7): +for j in range(7, 9): for line in files[j]: line = line.rstrip() if not line: break headers[j - 1].append(line) -headers.append(['UnicodeData.txt does not have a header.']) +headers.append(["UnicodeData.txt does not have a header."]) -data = [{} for f in files] -values = [{} for f in files] +data = [{} for _ in files] +values = [{} for _ in files] for i, f in enumerate(files): - extended = False - for line in f: - # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/522 - if extended and line.startswith('# ') and line.find(';'): - line = line[2:] - elif 'USE_Syllabic_Category' in line: - extended = True j = line.find('#') if j >= 0: @@ -55,24 +54,26 @@ else: end = int(uu[1], 16) - t = fields[1 if i not in [2, 3] else 2] - if i == 3: + t = fields[1 if i not in [2, 4] else 2] + + if i == 2: t = 'jt_' + t - elif i == 5 and t == 'Consonant_Final_Modifier': + elif i == 3 and t != 'Default_Ignorable_Code_Point': + continue + elif i == 7 and t == 'Consonant_Final_Modifier': # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/336 t = 'Syllable_Modifier' - elif i == 6 and t == 'NA': + elif i == 8 and t == 'NA': t = 'Not_Applicable' - i0 = i if i < 5 else i - 5 + i0 = i if i < 7 else i - 7 for u in range(start, end + 1): data[i0][u] = t values[i0][t] = values[i0].get(t, 0) + end - start + 1 -defaults = ('Other', 'Not_Applicable', 'Cn', 'jt_X', 'No_Block') +defaults = ('Other', 'Not_Applicable', 'jt_X', '', 'Cn', 'No_Block', 'Unknown') # TODO Characters that are not in Unicode Indic files, but used in USE -data[0][0x0640] = defaults[0] data[0][0x1B61] = defaults[0] data[0][0x1B63] = defaults[0] data[0][0x1B64] = defaults[0] @@ -82,29 +83,6 @@ data[0][0x1B69] = defaults[0] data[0][0x1B6A] = defaults[0] data[0][0x2060] = defaults[0] -for u in range(0x07CA, 0x07EA + 1): - data[0][u] = defaults[0] -data[0][0x07FA] = defaults[0] -for u in range(0x0840, 0x0858 + 1): - data[0][u] = defaults[0] -for u in range(0x1887, 0x18A8 + 1): - data[0][u] = defaults[0] -data[0][0x18AA] = defaults[0] -for u in range(0xA840, 0xA872 + 1): - data[0][u] = defaults[0] -for u in range(0x10B80, 0x10B91 + 1): - data[0][u] = defaults[0] -for u in range(0x10BA9, 0x10BAE + 1): - data[0][u] = defaults[0] -data[0][0x10FB0] = defaults[0] -for u in range(0x10FB2, 0x10FB6 + 1): - data[0][u] = defaults[0] -for u in range(0x10FB8, 0x10FBF + 1): - data[0][u] = defaults[0] -for u in range(0x10FC1, 0x10FC4 + 1): - data[0][u] = defaults[0] -for u in range(0x10FC9, 0x10FCB + 1): - data[0][u] = defaults[0] # TODO https://github.com/harfbuzz/harfbuzz/pull/1685 data[0][0x1B5B] = 'Consonant_Placeholder' data[0][0x1B5C] = 'Consonant_Placeholder' @@ -123,16 +101,16 @@ combined = {} for i, d in enumerate(data): for u, v in d.items(): - if i >= 2 and not u in combined: - continue if not u in combined: + if i >= 4: + continue combined[u] = list(defaults) combined[u][i] = v combined = {k: v for k, v in combined.items( -) if v[4] not in DISABLED_BLOCKS} +) if v[6] not in DISABLED_SCRIPTS} data = combined del combined -num = len(data) + property_names = [ # General_Category @@ -189,6 +167,7 @@ 'Top', 'Bottom', 'Top_And_Bottom', + 'Top_And_Bottom_And_Left', 'Top_And_Right', 'Top_And_Left', 'Top_And_Left_And_Right', @@ -206,12 +185,6 @@ 'jt_X', ] -try: - basestring -except NameError: - basestring = str - - class PropertyValue(object): def __init__(self, name_): self.name = name_ @@ -220,7 +193,7 @@ def __str__(self): return self.name def __eq__(self, other): - return self.name == (other if isinstance(other, basestring) else other.name) + return self.name == (other if isinstance(other, str) else other.name) def __ne__(self, other): return not (self == other) @@ -239,9 +212,8 @@ def __hash__(self): globals().update(property_values) -def is_BASE(U, UISC, UGC, AJT): +def is_BASE(U, UISC, UDI, UGC, AJT): return (UISC in [Number, Consonant, Consonant_Head_Letter, - # SPEC-DRAFT Consonant_Placeholder, Tone_Letter, Vowel_Independent, ] or @@ -251,114 +223,115 @@ def is_BASE(U, UISC, UGC, AJT): Consonant_Subjoined, Vowel, Vowel_Dependent])) -def is_BASE_NUM(U, UISC, UGC, AJT): +def is_BASE_NUM(U, UISC, UDI, UGC, AJT): return UISC == Brahmi_Joining_Number -def is_BASE_OTHER(U, UISC, UGC, AJT): +def is_BASE_OTHER(U, UISC, UDI, UGC, AJT): if UISC == Consonant_Placeholder: return True return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] -def is_CONS_FINAL(U, UISC, UGC, AJT): +def is_CGJ(U, UISC, UDI, UGC, AJT): + # Also includes VARIATION_SELECTOR, WJ, and ZWJ + return U == 0x200D or UDI and UGC in [Mc, Me, Mn] + + +def is_CONS_FINAL(U, UISC, UDI, UGC, AJT): return ((UISC == Consonant_Final and UGC != Lo) or UISC == Consonant_Succeeding_Repha) -def is_CONS_FINAL_MOD(U, UISC, UGC, AJT): +def is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT): return UISC == Syllable_Modifier -def is_CONS_MED(U, UISC, UGC, AJT): +def is_CONS_MED(U, UISC, UDI, UGC, AJT): # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec. return (UISC == Consonant_Medial and UGC != Lo or UISC == Consonant_Initial_Postfixed) -def is_CONS_MOD(U, UISC, UGC, AJT): +def is_CONS_MOD(U, UISC, UDI, UGC, AJT): return (UISC in [Nukta, Gemination_Mark, Consonant_Killer] and - not is_SYM_MOD(U, UISC, UGC, AJT)) + not is_SYM_MOD(U, UISC, UDI, UGC, AJT)) -def is_CONS_SUB(U, UISC, UGC, AJT): +def is_CONS_SUB(U, UISC, UDI, UGC, AJT): return UISC == Consonant_Subjoined and UGC != Lo -def is_CONS_WITH_STACKER(U, UISC, UGC, AJT): +def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT): return UISC == Consonant_With_Stacker -def is_HALANT(U, UISC, UGC, AJT): +def is_HALANT(U, UISC, UDI, UGC, AJT): return (UISC in [Virama, Invisible_Stacker] - and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC, AJT) - and not is_SAKOT(U, UISC, UGC, AJT)) + and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT) + and not is_SAKOT(U, UISC, UDI, UGC, AJT)) -def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC, AJT): - # https://github.com/harfbuzz/harfbuzz/issues/1102 +def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT): + # Split off of HALANT # https://github.com/harfbuzz/harfbuzz/issues/1379 - return U in [0x11046, 0x1134D] + return U == 0x1134D -def is_HALANT_NUM(U, UISC, UGC, AJT): +def is_HALANT_NUM(U, UISC, UDI, UGC, AJT): return UISC == Number_Joiner -def is_HIEROGLYPH(U, UISC, UGC, AJT): +def is_HIEROGLYPH(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph -def is_HIEROGLYPH_JOINER(U, UISC, UGC, AJT): +def is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Joiner -def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UGC, AJT): +def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Segment_Begin -def is_HIEROGLYPH_SEGMENT_END(U, UISC, UGC, AJT): +def is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Segment_End -def is_ZWNJ(U, UISC, UGC, AJT): +def is_ZWNJ(U, UISC, UDI, UGC, AJT): return UISC == Non_Joiner -def is_OTHER(U, UISC, UGC, AJT): +def is_OTHER(U, UISC, UDI, UGC, AJT): + # Also includes BASE_IND, Rsv, and SYM return ((UGC in [Cn, Po] or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other]) - and not is_BASE(U, UISC, UGC, AJT) - and not is_BASE_OTHER(U, UISC, UGC, AJT) - and not is_SYM(U, UISC, UGC, AJT) - and not is_SYM_MOD(U, UISC, UGC, AJT) + and not is_BASE(U, UISC, UDI, UGC, AJT) + and not is_BASE_OTHER(U, UISC, UDI, UGC, AJT) + and not is_CGJ(U, UISC, UDI, UGC, AJT) + and not is_SYM_MOD(U, UISC, UDI, UGC, AJT) ) -def is_REPHA(U, UISC, UGC, AJT): +def is_REPHA(U, UISC, UDI, UGC, AJT): return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed] -def is_SAKOT(U, UISC, UGC, AJT): +def is_SAKOT(U, UISC, UDI, UGC, AJT): + # Split off of HALANT return U == 0x1A60 -def is_SYM(U, UISC, UGC, AJT): - if U in [0x25CC, 0x1E14F]: - return False - return UGC in [So, Sc] and U not in [0x0F01, 0x1B62, 0x1B68] - - -def is_SYM_MOD(U, UISC, UGC, AJT): +def is_SYM_MOD(U, UISC, UDI, UGC, AJT): return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73] -def is_VOWEL(U, UISC, UGC, AJT): +def is_VOWEL(U, UISC, UDI, UGC, AJT): # https://github.com/harfbuzz/harfbuzz/issues/376 return (UISC == Pure_Killer or (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29])) -def is_VOWEL_MOD(U, UISC, UGC, AJT): +def is_VOWEL_MOD(U, UISC, UDI, UGC, AJT): # https://github.com/harfbuzz/harfbuzz/issues/376 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or (UGC != Lo and (UISC == Bindu or U in [0xAA29]))) @@ -368,6 +341,7 @@ def is_VOWEL_MOD(U, UISC, UGC, AJT): 'B': is_BASE, 'N': is_BASE_NUM, 'GB': is_BASE_OTHER, + 'CGJ': is_CGJ, 'F': is_CONS_FINAL, 'FM': is_CONS_FINAL_MOD, 'M': is_CONS_MED, @@ -384,7 +358,6 @@ def is_VOWEL_MOD(U, UISC, UGC, AJT): 'ZWNJ': is_ZWNJ, 'O': is_OTHER, 'R': is_REPHA, - 'S': is_SYM, 'SK': is_SAKOT, 'SM': is_SYM_MOD, 'V': is_VOWEL, @@ -399,9 +372,9 @@ def is_VOWEL_MOD(U, UISC, UGC, AJT): }, 'M': { 'ABV': [Top], - 'BLW': [Bottom, Bottom_And_Left], + 'BLW': [Bottom, Bottom_And_Left, Bottom_And_Right], 'PST': [Right], - 'PRE': [Left], + 'PRE': [Left, Top_And_Bottom_And_Left], }, 'CM': { 'ABV': [Top], @@ -439,16 +412,16 @@ def is_VOWEL_MOD(U, UISC, UGC, AJT): def map_to_use(data): out = {} items = use_mapping.items() - for U, (UISC, UIPC, UGC, AJT, UBlock) in data.items(): + for U, (UISC, UIPC, AJT, UDI, UGC, UBlock, _) in data.items(): # Resolve Indic_Syllabic_Category - # TODO: These don't have UISC assigned in Unicode 12.0, but have UIPC + # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark # Tibetan: - # TODO: These don't have UISC assigned in Unicode 12.0, but have UIPC + # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent @@ -466,30 +439,25 @@ def map_to_use(data): if U == 0xA982: UISC = Consonant_Succeeding_Repha - values = [k for k, v in items if v(U, UISC, UGC, AJT)] - assert len(values) == 1, "%s %s %s %s %s" % ( - hex(U), UISC, UGC, AJT, values) + values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)] + assert len(values) == 1, "%s %s %s %s %s %s" % ( + hex(U), UISC, UDI, UGC, AJT, values) USE = values[0] # Resolve Indic_Positional_Category - # TODO: These should die, but have UIPC in Unicode 12.0 + # TODO: These should die, but have UIPC in Unicode 13.0.0 if U in [0x953, 0x954]: UIPC = Not_Applicable - # TODO: In USE's override list but not in Unicode 12.0 - if U == 0x103C: - UIPC = Left - - # TODO: These are not in USE's override list that we have, nor are they in Unicode 12.0 + # TODO: These are not in USE's override list that we have, nor are they in Unicode 13.0.0 if 0xA926 <= U <= 0xA92A: UIPC = Top + # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037 # and https://github.com/harfbuzz/harfbuzz/issues/1631 if U in [0x11302, 0x11303, 0x114C1]: UIPC = Top - if U == 0x1171E: - UIPC = Left if 0x1CF8 <= U <= 0x1CF9: UIPC = Top @@ -501,13 +469,13 @@ def map_to_use(data): UIPC = Top assert (UIPC in [Not_Applicable, Visual_Order_Left] or U == 0x0F7F or - USE in use_positions), "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, AJT) + USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT) pos_mapping = use_positions.get(USE, None) if pos_mapping: - values = [k for k, v in pos_mapping.items() if v and UIPC in v] - assert len(values) == 1, '%s %s %s %s %s %s %s' % ( - hex(U), UIPC, USE, UISC, UGC, AJT, values) + values = [k for k,v in pos_mapping.items() if v and UIPC in v] + assert len(values) == 1, "%s %s %s %s %s %s %s %s" % ( + hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values) USE = USE + values[0] out[U] = (USE, UBlock) diff --git a/scripts/ms-use/IndicPositionalCategory-Additional.txt b/scripts/ms-use/IndicPositionalCategory-Additional.txt index 8d325adb..83a164e4 100644 --- a/scripts/ms-use/IndicPositionalCategory-Additional.txt +++ b/scripts/ms-use/IndicPositionalCategory-Additional.txt @@ -1,11 +1,12 @@ # Override values For Indic_Positional_Category # Not derivable # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 -# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 # Ammended for Unicode 10.0 by Andrew Glass 2018-09-21 -# Updated for L2/19-083 by Andrew Glass 2019-05-06 -# Updated for Unicode 12.1 by Andrew Glass 2019-05-30 -# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for L2/19-083 by Andrew Glass 2019-05-06 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-30 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-28 # ================================================ # ================================================ @@ -14,39 +15,39 @@ # ================================================ # Indic_Positional_Category=Bottom -0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model -0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model -0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model -A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA -11127..11129; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II -1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI -11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI +0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model +0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model +0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model +A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA +11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II +1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI +11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI # ================================================ # Indic_Positional_Category=Left -1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left +1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left # ================================================ # Indic_Positional_Category=Right -A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right -10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers -11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right +A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right +10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers +11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right # ================================================ # Indic_Positional_Category=Top -0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model -1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above -AA35   ; Top # Mn       CHAM CONSONANT SIGN +0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model +1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above +AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=Top_And_Right -0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake. -0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake. +0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake. +0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake. # ================================================ # ================================================ @@ -55,41 +56,46 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=Bottom -0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA -10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Not really bottom, but here for ccc to control -10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW -10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW -10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden to below because ccc-based Normalization controls order -10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW -10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden to below because ccc-based Normalization controls order -10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW -16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR -16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA +10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order +10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW +10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW +10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order +10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW +10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order +10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW +10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order +10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW +10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order +10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW +16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW # ================================================ # Indic_Positional_Category=Left -103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA +103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA # ================================================ # Indic_Positional_Category=Top -07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order -1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI -16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order +1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Top # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA # ================================================ # Indic_Positional_Category=Overstruck -1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK # ================================================ # ================================================ @@ -98,5 +104,6 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN # ================================================ # Indic_Positional_Category=NA -180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -2D7F ; NA # Mn TIFINAGH CONSONANT JOINER +180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NA # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +2D7F ; NA # Mn TIFINAGH CONSONANT JOINER diff --git a/scripts/ms-use/IndicSyllabicCategory-Additional.txt b/scripts/ms-use/IndicSyllabicCategory-Additional.txt index 09120552..277117cf 100644 --- a/scripts/ms-use/IndicSyllabicCategory-Additional.txt +++ b/scripts/ms-use/IndicSyllabicCategory-Additional.txt @@ -1,15 +1,16 @@ # Override values For Indic_Syllabic_Category # Not derivable # Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17 -# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 -# Updated for Unicode 12.1 by Andrew Glass 2019-05-24 -# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 10.0 by Andrew Glass 2017-07-25 +# Updated for Unicode 12.1 by Andrew Glass 2019-05-24 +# Updated for Unicode 13.0 by Andrew Glass 2020-07-28 +# Updated for Unicode 14.0 by Andrew Glass 2021-09-25 # ================================================ # OVERRIDES TO ASSIGNED VALUES # ================================================ -# Indic_Syllabic_Category=Bindu +# Indic_Syllabic_Category=Bindu 193A ; Bindu # Mn LIMBU SIGN KEMPHRENG AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA 10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW @@ -17,46 +18,45 @@ AA29 ; Bindu # Mn  CHAM VOWEL SIGN AA # ================================================ # Indic_Syllabic_Category=Consonant -0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN -0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED -0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA -19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant -25CC ; Consonant # So DOTTED CIRCLE +0840..0858 ; Consonant # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0F00..0F01 ; Consonant # Lo [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED +0F04..0F06 ; Consonant # Po TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA +19C1..19C7 ; Consonant # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant +25CC ; Consonant # So DOTTED CIRCLE # ================================================ # Indic_Syllabic_Category=Consonant_Dead -0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster +0F7F ; Consonant_Dead # Mc TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster # ================================================ # Indic_Syllabic_Category=Consonant_Final -0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN +0F35 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Consonant_Final # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0FC6 ; Consonant_Final # Mn TIBETAN SYMBOL PADMA GDAN # ================================================ # Indic_Syllabic_Category=Consonant_Final_Modifier -1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN +1C36 ; Consonant_Final_Modifier # Mn LEPCHA SIGN RAN # ================================================ -# Indic_Syllabic_Category=Gemination_Mark +# Indic_Syllabic_Category=Gemination_Mark 11134 ; Gemination_Mark # Mc CHAKMA MAAYYAA # ================================================ -# Indic_Syllabic_Category=Nukta +# Indic_Syllabic_Category=Nukta 0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel 10A38..10A3A ; Nukta # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW # ================================================ # Indic_Syllabic_Category=Tone_Mark -A982 ; Tone_Mark # Mn JAVANESE SIGN LAYAR# Not a repha, because it does not reorder to front of cluster -1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN -1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A7B..1A7C ; Tone_Mark # Mn [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tone_Mark # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT # ================================================ @@ -72,41 +72,50 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Consonant -0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF -1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work -1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER -180A ; Consonant # Po MONGOLIAN NIRUGU -1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS -1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO -2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK -10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW -10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW -10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA -10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET -10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN -111DA ; Consonant # Lo SHARADA EKAM -#HIEROGLYPHS moved to new category -#13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +0800..0815 ; Consonant # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +1800 ; Consonant # Po MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work +1807 ; Consonant # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER +180A ; Consonant # Po MONGOLIAN NIRUGU +1820..1878 ; Consonant # Lo [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS +1843 ; Consonant # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +2D30..2D67 ; Consonant # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Consonant # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +10570..1057A ; Consonant # Lo [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Consonant # Lo [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Consonant # Lo [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Consonant # Lo [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Consonant # Lo [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Consonant # Lo [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Consonant # Lo [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Consonant # Lo [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10AC0..10AC7 ; Consonant # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Consonant # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10D00..10D23 ; Consonant # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; Consonant # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; Consonant # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F30..10F45 ; Consonant # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +111DA ; Consonant # Lo SHARADA EKAM +#HIEROGLYPHS to be moved to new category +13000..1342E ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 #For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified. -#13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT -16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU -16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE -16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER -18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M -1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK -1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL -1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW -1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W -1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER -1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ -1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA -1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH -1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -1E94B ; Consonant # Lm ADLAM NASALIZATION MARK +13437..13438 ; Consonant # Lo [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT +16B00..16B2F ; Consonant # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16F00..16F4A ; Consonant # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16FE4 ; Consonant # Mn KHITAN SMALL SCRIPT FILLER # Avoids Mn pushing this into VOWEL class +18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Consonant # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; Consonant # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Consonant # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; Consonant # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Consonant # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E900..1E921 ; Consonant # Lu [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA +1E922..1E943 ; Consonant # Ll [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; Consonant # Lm ADLAM NASALIZATION MARK # ================================================ @@ -116,13 +125,13 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Gemination_Mark -10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI +10D27 ; Gemination_Mark # Mn HANIFI ROHINGYA SIGN TASSI # ================================================ # Indic_Syllabic_Category=Modifying_Letter -FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios -16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION +FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios +16F50 ; Modifying_Letter # Lo MIAO LETTER NASALIZATION # ================================================ @@ -136,49 +145,52 @@ FE00..FE0F ; Modifying_Letter # Mn [16] VARIATION SELECTOR-1..VARIATION SELE 16F4F ; Nukta # Mn MIAO SIGN CONSONANT MODIFIER BAR 1BC9D..1BC9E ; Nukta # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1E944..1E94A ; Nukta # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +10F82..10F85 ; Nukta # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW # ================================================ # Indic_Syllabic_Category=Number -10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE -10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED -1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE -1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE -1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +10D30..10D39 ; Number # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10F51..10F54 ; Number # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +16AC0..16AC9 ; Number # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +1E140..1E149 ; Number # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E2F0..1E2F9 ; Number # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E950..1E959 ; Number # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE # ================================================ # Indic_Syllabic_Category=Tone_Mark -07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07FD ; Tone_Mark # Mn NKO DANTAYALAN -0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -17CF ; Tone_Mark # Mn KHMER SIGN AHSDA -10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA -10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW -16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +07EB..07F3 ; Tone_Mark # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Tone_Mark # Mn NKO DANTAYALAN +0F86..0F87 ; Tone_Mark # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +17CF ; Tone_Mark # Mn KHMER SIGN AHSDA +10D24..10D26 ; Tone_Mark # Mn [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA +10F46..10F50 ; Tone_Mark # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +16B30..16B36 ; Tone_Mark # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F8F..16F92 ; Tone_Mark # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +1E130..1E136 ; Tone_Mark # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Tone_Mark # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Tone_Mark # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI # ================================================ # Indic_Syllabic_Category=Virama -2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER -13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE +2D7F ; Virama # Mn TIFINAGH CONSONANT JOINER +13430..13436 ; Virama # Cf [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE # ================================================ # Indic_Syllabic_Category=Vowel_Independent -AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA -AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA -AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN +AAB1 ; Vowel_Independent # Lo TAI VIET VOWEL AA +AABA ; Vowel_Independent # Lo TAI VIET VOWEL UA +AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN # ================================================ # Indic_Syllabic_Category=Vowel_Dependent -0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE -10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +0B55 ; Vowel_Dependent # Mn ORIYA SIGN OVERLINE +10EAB..10EAC ; Vowel_Dependent # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +16F51..16F87 ; Vowel_Dependent # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI # ================================================ # ================================================ diff --git a/src/buffer.rs b/src/buffer.rs index fb24d8bb..1b23584f 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -193,7 +193,7 @@ impl GlyphInfo { // FVSes are GC=Mn, we have use a separate bit to remember them. // Fixes: // https://github.com/harfbuzz/harfbuzz/issues/234 - 0x180B..=0x180D => props |= UnicodeProps::HIDDEN.bits(), + 0x180B..=0x180D | 0x180F => props |= UnicodeProps::HIDDEN.bits(), // TAG characters need similar treatment. Fixes: // https://github.com/harfbuzz/harfbuzz/issues/463 @@ -1495,7 +1495,7 @@ bitflags::bitflags! { pub struct UnicodeProps: u16 { const GENERAL_CATEGORY = 0x001F; const IGNORABLE = 0x0020; - // MONGOLIAN FREE VARIATION SELECTOR 1..3, or TAG characters + // MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters const HIDDEN = 0x0040; const CONTINUATION = 0x0080; diff --git a/src/complex/arabic.rs b/src/complex/arabic.rs index e07822da..da9e44c6 100644 --- a/src/complex/arabic.rs +++ b/src/complex/arabic.rs @@ -546,7 +546,7 @@ fn mongolian_variation_selectors(buffer: &mut Buffer) { let len = buffer.len; let info = &mut buffer.info; for i in 1..len { - if (0x180B..=0x180D).contains(&info[i].glyph_id) { + if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F { let a = info[i - 1].arabic_shaping_action(); info[i].set_arabic_shaping_action(a); } diff --git a/src/complex/indic.rs b/src/complex/indic.rs index 50267808..08cde528 100644 --- a/src/complex/indic.rs +++ b/src/complex/indic.rs @@ -67,6 +67,7 @@ pub mod category { pub const VS: u8 = 30; // Variation selectors pub const P: u8 = 31; // Punctuation pub const D: u8 = 32; // Digits except zero + pub const ML: u8 = 33; // Medial la } pub type Position = u8; @@ -675,13 +676,6 @@ fn collect_features(planner: &mut ShapePlanner) { planner.ot_map.add_feature(feature.0, feature.1, 1); } - planner - .ot_map - .enable_feature(feature::CONTEXTUAL_ALTERNATES, FeatureFlags::empty(), 1); - planner - .ot_map - .enable_feature(feature::CONTEXTUAL_LIGATURES, FeatureFlags::empty(), 1); - planner .ot_map .add_gsub_pause(Some(crate::ot::clear_syllables)); diff --git a/src/complex/myanmar.rs b/src/complex/myanmar.rs index 5917c8d4..c2f9b8bb 100644 --- a/src/complex/myanmar.rs +++ b/src/complex/myanmar.rs @@ -90,7 +90,9 @@ impl GlyphInfo { // XXX The spec says D0, but Uniscribe doesn't seem to do. 0x1040 => cat = category::D, - 0x103E | 0x1060 => cat = category::X_GROUP, + 0x103E => cat = category::X_GROUP, + + 0x1060 => cat = category::ML, 0x103C => cat = category::Y_GROUP, diff --git a/src/complex/myanmar_machine.rl b/src/complex/myanmar_machine.rl index 268f3275..83842d1c 100644 --- a/src/complex/myanmar_machine.rl +++ b/src/complex/myanmar_machine.rl @@ -33,6 +33,7 @@ GB = 11; H = 4; IV = 2; MH = 21; +ML = 33; MR = 22; MW = 23; MY = 24; @@ -54,9 +55,9 @@ k = (Ra As H); # Kinzi c = C|Ra; # is_consonant -medial_group = MY? As? MR? ((MW MH? | MH) As?)?; +medial_group = MY? As? MR? ((MW MH? ML? | MH ML? | ML) As?)?; main_vowel_group = (VPre.VS?)* VAbv* VBlw* A* (DB As?)?; -post_vowel_group = VPst MH? As* VAbv* A* (DB As?)?; +post_vowel_group = VPst MH? ML? As* VAbv* A* (DB As?)?; pwo_tone_group = PT A* DB? As?; complex_syllable_tail = As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* V* j?; diff --git a/src/complex/myanmar_machine.rs b/src/complex/myanmar_machine.rs index 96b0b55b..eeced260 100644 --- a/src/complex/myanmar_machine.rs +++ b/src/complex/myanmar_machine.rs @@ -19,86 +19,94 @@ use crate::buffer::Buffer; -static _myanmar_syllable_machine_trans_keys: [u8; 106] = [ - 0, 21, 1, 20, 3, 19, 3, 5, 3, 19, 1, 15, 3, 15, 3, 15, 1, 19, 1, 19, 1, 19, 1, 19, 0, 8, 1, 19, - 1, 19, 1, 19, 1, 19, 1, 19, 1, 20, 1, 19, 1, 19, 1, 19, 1, 19, 1, 19, 3, 19, 3, 5, 3, 19, 1, - 15, 3, 15, 3, 15, 1, 19, 1, 19, 1, 19, 1, 19, 0, 8, 1, 20, 1, 19, 1, 19, 1, 19, 1, 19, 1, 19, - 1, 20, 1, 19, 1, 19, 1, 19, 1, 19, 1, 19, 1, 20, 1, 19, 0, 20, 0, 8, 5, 5, 0, 0, +static _myanmar_syllable_machine_trans_keys: [u8; 114] = [ + 0, 22, 1, 22, 3, 19, 3, 5, 3, 19, 1, 15, 3, 15, 3, 15, 1, 22, 1, 19, 1, 19, 1, 19, 1, 22, 0, 8, + 1, 22, 1, 22, 1, 19, 1, 19, 1, 19, 1, 20, 1, 19, 1, 22, 1, 22, 1, 22, 1, 22, 1, 22, 3, 19, 3, + 5, 3, 19, 1, 15, 3, 15, 3, 15, 1, 22, 1, 19, 1, 19, 1, 19, 1, 22, 0, 8, 1, 22, 1, 22, 1, 22, 1, + 19, 1, 19, 1, 19, 1, 20, 1, 19, 1, 22, 1, 22, 1, 22, 1, 22, 1, 22, 1, 22, 1, 22, 0, 22, 0, 8, + 5, 5, 0, 0, ]; -static _myanmar_syllable_machine_char_class: [i8; 34] = [ +static _myanmar_syllable_machine_char_class: [i8; 35] = [ 0, 0, 1, 2, 3, 3, 4, 5, 4, 6, 7, 4, 4, 4, 4, 8, 4, 9, 10, 4, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 7, 0, 0, + 19, 20, 21, 7, 22, 0, 0, ]; -static _myanmar_syllable_machine_index_offsets: [i16; 54] = [ - 0, 22, 42, 59, 62, 79, 94, 107, 120, 139, 158, 177, 196, 205, 224, 243, 262, 281, 300, 320, - 339, 358, 377, 396, 415, 432, 435, 452, 467, 480, 493, 512, 531, 550, 569, 578, 598, 617, 636, - 655, 674, 693, 713, 732, 751, 770, 789, 808, 828, 847, 868, 877, 0, 0, +static _myanmar_syllable_machine_index_offsets: [i16; 58] = [ + 0, 23, 45, 62, 65, 82, 97, 110, 123, 145, 164, 183, 202, 224, 233, 255, 277, 296, 315, 334, + 354, 373, 395, 417, 439, 461, 483, 500, 503, 520, 535, 548, 561, 583, 602, 621, 640, 662, 671, + 693, 715, 737, 756, 775, 794, 814, 833, 855, 877, 899, 921, 943, 965, 987, 1010, 1019, 0, 0, ]; -static _myanmar_syllable_machine_indices: [i8; 880] = [ - 2, 3, 4, 5, 1, 6, 7, 2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 22, - 26, 27, 22, 22, 28, 22, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 25, 22, 26, 22, 22, 22, 39, 22, - 22, 22, 22, 22, 33, 22, 22, 22, 37, 25, 22, 26, 25, 22, 26, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 33, 22, 22, 22, 37, 40, 22, 25, 22, 26, 33, 22, 22, 41, 22, 22, 22, 22, 22, 33, 25, 22, 26, 22, - 22, 22, 41, 22, 22, 22, 22, 22, 33, 25, 22, 26, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, 23, 22, - 25, 22, 26, 27, 22, 22, 42, 22, 42, 22, 22, 22, 33, 43, 22, 22, 37, 23, 22, 25, 22, 26, 27, 22, - 22, 22, 22, 22, 22, 22, 22, 33, 22, 22, 22, 37, 23, 22, 25, 22, 26, 27, 22, 22, 42, 22, 22, 22, - 22, 22, 33, 43, 22, 22, 37, 23, 22, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 33, 43, 22, - 22, 37, 2, 22, 22, 22, 22, 22, 22, 22, 2, 23, 22, 25, 22, 26, 27, 22, 22, 28, 22, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 23, 22, 25, 22, 26, 27, 22, 22, 44, 22, 22, 22, 22, 22, 33, 34, 35, 36, - 37, 23, 22, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 33, 34, 35, 36, 37, 23, 22, 25, 22, - 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 33, 34, 35, 22, 37, 23, 22, 25, 22, 26, 27, 22, 22, 22, - 22, 22, 22, 22, 22, 33, 22, 35, 22, 37, 23, 22, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, - 33, 34, 35, 36, 37, 44, 23, 22, 25, 22, 26, 27, 22, 22, 22, 22, 29, 22, 31, 22, 33, 34, 35, 36, - 37, 23, 22, 25, 22, 26, 27, 22, 22, 44, 22, 29, 22, 22, 22, 33, 34, 35, 36, 37, 23, 22, 25, 22, - 26, 27, 22, 22, 45, 22, 29, 30, 31, 22, 33, 34, 35, 36, 37, 23, 22, 25, 22, 26, 27, 22, 22, 22, - 22, 29, 30, 31, 22, 33, 34, 35, 36, 37, 23, 24, 25, 22, 26, 27, 22, 22, 28, 22, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 47, 46, 6, 46, 46, 46, 48, 46, 46, 46, 46, 46, 15, 46, 46, 46, 19, 47, 46, - 6, 47, 46, 6, 46, 46, 46, 46, 46, 46, 46, 46, 46, 15, 46, 46, 46, 19, 49, 46, 47, 46, 6, 15, - 46, 46, 50, 46, 46, 46, 46, 46, 15, 47, 46, 6, 46, 46, 46, 50, 46, 46, 46, 46, 46, 15, 47, 46, - 6, 46, 46, 46, 46, 46, 46, 46, 46, 46, 15, 3, 46, 47, 46, 6, 7, 46, 46, 51, 46, 51, 46, 46, 46, - 15, 52, 46, 46, 19, 3, 46, 47, 46, 6, 7, 46, 46, 46, 46, 46, 46, 46, 46, 15, 46, 46, 46, 19, 3, - 46, 47, 46, 6, 7, 46, 46, 51, 46, 46, 46, 46, 46, 15, 52, 46, 46, 19, 3, 46, 47, 46, 6, 7, 46, - 46, 46, 46, 46, 46, 46, 46, 15, 52, 46, 46, 19, 53, 46, 46, 46, 46, 46, 46, 46, 53, 3, 4, 47, - 46, 6, 7, 46, 46, 9, 46, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 3, 46, 47, 46, 6, 7, 46, 46, - 9, 46, 11, 12, 13, 14, 15, 16, 17, 18, 19, 3, 46, 47, 46, 6, 7, 46, 46, 54, 46, 46, 46, 46, 46, - 15, 16, 17, 18, 19, 3, 46, 47, 46, 6, 7, 46, 46, 46, 46, 46, 46, 46, 46, 15, 16, 17, 18, 19, 3, - 46, 47, 46, 6, 7, 46, 46, 46, 46, 46, 46, 46, 46, 15, 16, 17, 46, 19, 3, 46, 47, 46, 6, 7, 46, - 46, 46, 46, 46, 46, 46, 46, 15, 46, 17, 46, 19, 3, 46, 47, 46, 6, 7, 46, 46, 46, 46, 46, 46, - 46, 46, 15, 16, 17, 18, 19, 54, 3, 46, 47, 46, 6, 7, 46, 46, 46, 46, 11, 46, 13, 46, 15, 16, - 17, 18, 19, 3, 46, 47, 46, 6, 7, 46, 46, 54, 46, 11, 46, 46, 46, 15, 16, 17, 18, 19, 3, 46, 47, - 46, 6, 7, 46, 46, 55, 46, 11, 12, 13, 46, 15, 16, 17, 18, 19, 3, 46, 47, 46, 6, 7, 46, 46, 46, - 46, 11, 12, 13, 46, 15, 16, 17, 18, 19, 3, 4, 47, 46, 6, 7, 46, 46, 9, 46, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 23, 24, 25, 22, 26, 27, 22, 22, 56, 22, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 23, 57, 25, 22, 26, 27, 22, 22, 28, 22, 29, 30, 31, 32, 33, 34, 35, 36, 37, 2, 3, 4, 47, 46, 6, - 7, 2, 2, 9, 46, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 2, 58, 58, 58, 58, 58, 58, 2, 2, 59, 0, - 0, +static _myanmar_syllable_machine_indices: [i8; 1022] = [ + 2, 3, 4, 5, 1, 6, 7, 2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, + 23, 27, 28, 23, 23, 29, 23, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 23, 40, 26, 23, 27, 23, 23, + 23, 41, 23, 23, 23, 23, 23, 34, 23, 23, 23, 38, 26, 23, 27, 26, 23, 27, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 34, 23, 23, 23, 38, 42, 23, 26, 23, 27, 34, 23, 23, 43, 23, 23, 23, 23, 23, 34, 26, + 23, 27, 23, 23, 23, 43, 23, 23, 23, 23, 23, 34, 26, 23, 27, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 34, 24, 23, 26, 23, 27, 28, 23, 23, 44, 23, 45, 23, 23, 23, 34, 46, 23, 23, 38, 23, 23, 44, 24, + 23, 26, 23, 27, 28, 23, 23, 23, 23, 23, 23, 23, 23, 34, 23, 23, 23, 38, 24, 23, 26, 23, 27, 28, + 23, 23, 44, 23, 23, 23, 23, 23, 34, 46, 23, 23, 38, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 23, + 23, 23, 23, 34, 46, 23, 23, 38, 24, 23, 26, 23, 27, 28, 23, 23, 44, 23, 23, 23, 23, 23, 34, 46, + 23, 23, 38, 23, 23, 44, 2, 23, 23, 23, 23, 23, 23, 23, 2, 24, 23, 26, 23, 27, 28, 23, 23, 29, + 23, 30, 31, 32, 33, 34, 35, 36, 37, 38, 23, 23, 40, 24, 23, 26, 23, 27, 28, 23, 23, 47, 23, 23, + 23, 23, 23, 34, 35, 36, 37, 38, 23, 23, 40, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 23, 23, 23, + 23, 34, 35, 36, 37, 38, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 23, 23, 23, 23, 34, 35, 36, 23, + 38, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 23, 23, 23, 23, 34, 23, 36, 23, 38, 24, 23, 26, 23, + 27, 28, 23, 23, 23, 23, 23, 23, 23, 23, 34, 35, 36, 37, 38, 47, 24, 23, 26, 23, 27, 28, 23, 23, + 47, 23, 23, 23, 23, 23, 34, 35, 36, 37, 38, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 30, 23, 32, + 23, 34, 35, 36, 37, 38, 23, 23, 40, 24, 23, 26, 23, 27, 28, 23, 23, 47, 23, 30, 23, 23, 23, 34, + 35, 36, 37, 38, 23, 23, 40, 24, 23, 26, 23, 27, 28, 23, 23, 48, 23, 30, 31, 32, 23, 34, 35, 36, + 37, 38, 23, 23, 40, 24, 23, 26, 23, 27, 28, 23, 23, 23, 23, 30, 31, 32, 23, 34, 35, 36, 37, 38, + 23, 23, 40, 24, 25, 26, 23, 27, 28, 23, 23, 29, 23, 30, 31, 32, 33, 34, 35, 36, 37, 38, 23, 23, + 40, 50, 49, 6, 49, 49, 49, 51, 49, 49, 49, 49, 49, 15, 49, 49, 49, 19, 50, 49, 6, 50, 49, 6, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 15, 49, 49, 49, 19, 52, 49, 50, 49, 6, 15, 49, 49, 53, 49, + 49, 49, 49, 49, 15, 50, 49, 6, 49, 49, 49, 53, 49, 49, 49, 49, 49, 15, 50, 49, 6, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 15, 3, 49, 50, 49, 6, 7, 49, 49, 54, 49, 55, 49, 49, 49, 15, 56, 49, + 49, 19, 49, 49, 54, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 49, 49, 49, 49, 15, 49, 49, 49, 19, 3, + 49, 50, 49, 6, 7, 49, 49, 54, 49, 49, 49, 49, 49, 15, 56, 49, 49, 19, 3, 49, 50, 49, 6, 7, 49, + 49, 49, 49, 49, 49, 49, 49, 15, 56, 49, 49, 19, 3, 49, 50, 49, 6, 7, 49, 49, 54, 49, 49, 49, + 49, 49, 15, 56, 49, 49, 19, 49, 49, 54, 57, 49, 49, 49, 49, 49, 49, 49, 57, 3, 4, 50, 49, 6, 7, + 49, 49, 9, 49, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 9, + 49, 11, 12, 13, 14, 15, 16, 17, 18, 19, 49, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 58, 49, 49, + 49, 49, 49, 15, 16, 17, 18, 19, 49, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 49, 49, 49, + 49, 15, 16, 17, 18, 19, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 49, 49, 49, 49, 15, 16, 17, 49, + 19, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 49, 49, 49, 49, 15, 49, 17, 49, 19, 3, 49, 50, 49, 6, + 7, 49, 49, 49, 49, 49, 49, 49, 49, 15, 16, 17, 18, 19, 58, 3, 49, 50, 49, 6, 7, 49, 49, 58, 49, + 49, 49, 49, 49, 15, 16, 17, 18, 19, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 11, 49, 13, 49, 15, + 16, 17, 18, 19, 49, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 58, 49, 11, 49, 49, 49, 15, 16, 17, + 18, 19, 49, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 59, 49, 11, 12, 13, 49, 15, 16, 17, 18, 19, + 49, 49, 22, 3, 49, 50, 49, 6, 7, 49, 49, 49, 49, 11, 12, 13, 49, 15, 16, 17, 18, 19, 49, 49, + 22, 3, 4, 50, 49, 6, 7, 49, 49, 9, 49, 11, 12, 13, 14, 15, 16, 17, 18, 19, 49, 49, 22, 24, 25, + 26, 23, 27, 28, 23, 23, 60, 23, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 23, 40, 24, 61, 26, 23, + 27, 28, 23, 23, 29, 23, 30, 31, 32, 33, 34, 35, 36, 37, 38, 23, 23, 40, 2, 3, 4, 50, 49, 6, 7, + 2, 2, 9, 49, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 49, 22, 2, 62, 62, 62, 62, 62, 62, 2, 2, + 63, 0, 0, ]; -static _myanmar_syllable_machine_index_defaults: [i8; 54] = [ - 1, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 22, - 22, 46, 58, 58, 0, 0, +static _myanmar_syllable_machine_index_defaults: [i8; 58] = [ + 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 23, 23, 49, 62, 62, 0, 0, ]; -static _myanmar_syllable_machine_cond_targs: [i8; 62] = [ - 0, 0, 1, 24, 34, 0, 25, 31, 47, 36, 50, 37, 42, 43, 44, 27, 39, 40, 41, 30, 46, 51, 0, 2, 12, - 0, 3, 9, 13, 14, 19, 20, 21, 5, 16, 17, 18, 8, 23, 4, 6, 7, 10, 11, 15, 22, 0, 0, 26, 28, 29, - 32, 33, 35, 38, 45, 48, 49, 0, 0, 0, 0, +static _myanmar_syllable_machine_cond_targs: [i8; 66] = [ + 0, 0, 1, 26, 37, 0, 27, 33, 51, 39, 54, 40, 46, 47, 48, 29, 42, 43, 44, 32, 50, 55, 45, 0, 2, + 13, 0, 3, 9, 14, 15, 21, 22, 23, 5, 17, 18, 19, 8, 25, 20, 4, 6, 7, 10, 12, 11, 16, 24, 0, 0, + 28, 30, 31, 34, 36, 35, 38, 41, 49, 52, 53, 0, 0, 0, 0, ]; -static _myanmar_syllable_machine_cond_actions: [i8; 62] = [ - 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 6, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 0, +static _myanmar_syllable_machine_cond_actions: [i8; 66] = [ + 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 6, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, + 10, 0, 0, ]; -static _myanmar_syllable_machine_to_state_actions: [i8; 54] = [ +static _myanmar_syllable_machine_to_state_actions: [i8; 58] = [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -static _myanmar_syllable_machine_from_state_actions: [i8; 54] = [ +static _myanmar_syllable_machine_from_state_actions: [i8; 58] = [ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -static _myanmar_syllable_machine_eof_trans: [i8; 54] = [ - 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 23, - 23, 47, 59, 59, 0, 0, +static _myanmar_syllable_machine_eof_trans: [i8; 58] = [ + 1, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 24, 24, 50, 63, 63, 0, 0, ]; static myanmar_syllable_machine_start: i32 = 0; static myanmar_syllable_machine_first_final: i32 = 0; @@ -160,7 +168,7 @@ pub fn find_syllables_myanmar(buffer: &mut Buffer) { { _keys = (cs << 1) as i32; _inds = (_myanmar_syllable_machine_index_offsets[(cs) as usize]) as i32; - if ((buffer.info[p].indic_category() as u8) <= 32 + if ((buffer.info[p].indic_category() as u8) <= 33 && (buffer.info[p].indic_category() as u8) >= 1) { { diff --git a/src/complex/universal.rs b/src/complex/universal.rs index 21bcd48b..e12d137c 100644 --- a/src/complex/universal.rs +++ b/src/complex/universal.rs @@ -32,6 +32,7 @@ pub mod category { pub const N: u8 = 4; // BASE_NUM pub const GB: u8 = 5; // BASE_OTHER + pub const CGJ: u8 = 6; // pub const CGJ: u8 = 6; // CGJ // pub const F: u8 = 7; // CONS_FINAL @@ -294,8 +295,7 @@ fn setup_topographical_masks(plan: &ShapePlan, buffer: &mut Buffer) { let mut end = buffer.next_syllable(0); while start < buffer.len { let syllable = buffer.info[start].syllable() & 0x0F; - if syllable == SyllableType::IndependentCluster as u8 - || syllable == SyllableType::SymbolCluster as u8 + if syllable == SyllableType::SymbolCluster as u8 || syllable == SyllableType::HieroglyphCluster as u8 || syllable == SyllableType::NonCluster as u8 { diff --git a/src/complex/universal_machine.rl b/src/complex/universal_machine.rl index d5663a98..9ff502af 100644 --- a/src/complex/universal_machine.rl +++ b/src/complex/universal_machine.rl @@ -35,13 +35,13 @@ O = 0; # OTHER B = 1; # BASE N = 4; # BASE_NUM GB = 5; # BASE_OTHER +CGJ = 6; # CGJ SUB = 11; # CONS_SUB H = 12; # HALANT HN = 13; # HALANT_NUM ZWNJ = 14; # Zero width non-joiner R = 18; # REPHA -S = 19; # SYM CS = 43; # CONS_WITH_STACKER HVM = 44; # HALANT_OR_VOWEL_MODIFIER Sk = 48; # SAKOT @@ -99,34 +99,38 @@ number_joiner_terminated_cluster_tail = (HN N)* HN; numeral_cluster_tail = (HN N)+; symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; -virama_terminated_cluster = - complex_syllable_start +virama_terminated_cluster_tail = consonant_modifiers h ; -sakot_terminated_cluster = +virama_terminated_cluster = complex_syllable_start + virama_terminated_cluster_tail +; +sakot_terminated_cluster_tail = complex_syllable_middle Sk ; +sakot_terminated_cluster = + complex_syllable_start + sakot_terminated_cluster_tail +; standard_cluster = complex_syllable_start complex_syllable_tail ; broken_cluster = R? - (complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail) + (complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail | sakot_terminated_cluster_tail) ; number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; numeral_cluster = N numeral_cluster_tail?; -symbol_cluster = (S | GB) symbol_cluster_tail?; +symbol_cluster = (O | GB) symbol_cluster_tail?; hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*; -independent_cluster = O; other = any; main := |* - independent_cluster => { found_syllable!(SyllableType::IndependentCluster); }; virama_terminated_cluster => { found_syllable!(SyllableType::ViramaTerminatedCluster); }; sakot_terminated_cluster => { found_syllable!(SyllableType::SakotTerminatedCluster); }; standard_cluster => { found_syllable!(SyllableType::StandardCluster); }; @@ -203,18 +207,18 @@ fn found_syllable( } } -fn not_standard_default_ignorable(i: &GlyphInfo) -> bool { - !(matches!(i.use_category(), category::O | category::RSV) && i.is_default_ignorable()) +fn not_ccs_default_ignorable(i: &GlyphInfo) -> bool { + !(matches!(i.use_category(), category::CGJ | category::RSV) && i.is_default_ignorable()) } fn included(infos: &[Cell], i: usize) -> bool { let glyph = infos[i].get(); - if !not_standard_default_ignorable(&glyph) { + if !not_ccs_default_ignorable(&glyph) { return false; } if glyph.use_category() == category::ZWNJ { for glyph2 in &infos[i + 1..] { - if not_standard_default_ignorable(&glyph2.get()) { + if not_ccs_default_ignorable(&glyph2.get()) { return !glyph2.get().is_unicode_mark(); } } diff --git a/src/complex/universal_machine.rs b/src/complex/universal_machine.rs index ca5b1de4..d193adbd 100644 --- a/src/complex/universal_machine.rs +++ b/src/complex/universal_machine.rs @@ -1,7 +1,3 @@ -// This file is autogenerated. Do not edit it! -// -// See docs/ragel.md for details. - #![allow( dead_code, non_upper_case_globals, @@ -23,104 +19,103 @@ use crate::complex::universal::category; use crate::GlyphInfo; use core::cell::Cell; -static _use_syllable_machine_trans_keys: [u8; 126] = [ - 1, 1, 1, 1, 0, 37, 5, 34, 5, 34, 1, 1, 10, 34, 11, 34, 12, 33, 13, 33, 14, 33, 31, 32, 32, 32, - 12, 34, 12, 34, 12, 34, 1, 1, 12, 34, 11, 34, 11, 34, 11, 34, 10, 34, 10, 34, 10, 34, 5, 34, 1, - 34, 7, 7, 3, 3, 5, 34, 27, 28, 28, 28, 5, 34, 10, 34, 11, 34, 12, 33, 13, 33, 14, 33, 31, 32, - 32, 32, 12, 34, 12, 34, 12, 34, 12, 34, 11, 34, 11, 34, 11, 34, 10, 34, 10, 34, 10, 34, 5, 34, - 1, 34, 1, 1, 3, 3, 7, 7, 1, 34, 5, 34, 27, 28, 28, 28, 1, 4, 36, 38, 35, 38, 35, 37, 0, 0, +static _use_syllable_machine_trans_keys: [u8; 124] = [ + 0, 36, 26, 27, 27, 27, 5, 33, 5, 33, 1, 1, 9, 33, 10, 33, 11, 32, 12, 32, 13, 32, 30, 31, 31, + 31, 11, 33, 11, 33, 11, 33, 1, 1, 11, 33, 10, 33, 10, 33, 10, 33, 9, 33, 9, 33, 9, 33, 5, 33, + 1, 33, 7, 7, 3, 3, 5, 33, 5, 33, 1, 1, 9, 33, 10, 33, 11, 32, 12, 32, 13, 32, 30, 31, 31, 31, + 11, 33, 11, 33, 11, 33, 1, 1, 11, 33, 10, 33, 10, 33, 10, 33, 9, 33, 9, 33, 9, 33, 5, 33, 1, + 33, 3, 3, 7, 7, 1, 33, 5, 33, 26, 27, 27, 27, 1, 4, 35, 37, 34, 37, 34, 36, 0, 0, ]; static _use_syllable_machine_char_class: [i8; 55] = [ - 0, 1, 2, 2, 3, 4, 2, 2, 2, 2, 2, 5, 6, 7, 2, 2, 2, 2, 8, 9, 2, 2, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 2, 24, 25, 26, 2, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 0, 1, 2, 2, 3, 4, 2, 2, 2, 2, 2, 5, 6, 7, 2, 2, 2, 2, 8, 2, 2, 2, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 2, 23, 24, 25, 2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 0, 0, ]; -static _use_syllable_machine_index_offsets: [i16; 64] = [ - 0, 1, 2, 40, 70, 100, 101, 126, 150, 172, 193, 213, 215, 216, 239, 262, 285, 286, 309, 333, - 357, 381, 406, 431, 456, 486, 520, 521, 522, 552, 554, 555, 585, 610, 634, 656, 677, 697, 699, - 700, 723, 746, 769, 792, 816, 840, 864, 889, 914, 939, 969, 1003, 1004, 1005, 1006, 1040, 1070, - 1072, 1073, 1077, 1080, 1084, 0, 0, +static _use_syllable_machine_index_offsets: [i16; 63] = [ + 0, 37, 39, 40, 69, 98, 99, 124, 148, 170, 191, 211, 213, 214, 237, 260, 283, 284, 307, 331, + 355, 379, 404, 429, 454, 483, 516, 517, 518, 547, 576, 577, 602, 626, 648, 669, 689, 691, 692, + 715, 738, 761, 762, 785, 809, 833, 857, 882, 907, 932, 961, 994, 995, 996, 1029, 1058, 1060, + 1061, 1065, 1068, 1072, 0, 0, ]; -static _use_syllable_machine_indices: [i8; 1089] = [ - 1, 2, 4, 5, 6, 7, 8, 1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 13, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 9, 36, 6, 37, 39, 40, 38, 38, 38, 41, 42, 43, 44, 45, - 46, 47, 41, 48, 5, 49, 50, 51, 52, 53, 54, 55, 38, 38, 38, 56, 57, 58, 59, 40, 39, 40, 38, 38, - 38, 41, 42, 43, 44, 45, 46, 47, 41, 48, 49, 49, 50, 51, 52, 53, 54, 55, 38, 38, 38, 56, 57, 58, - 59, 40, 39, 41, 42, 43, 44, 45, 38, 38, 38, 38, 38, 38, 50, 51, 52, 53, 54, 55, 38, 38, 38, 42, - 57, 58, 59, 61, 42, 43, 44, 45, 38, 38, 38, 38, 38, 38, 38, 38, 38, 53, 54, 55, 38, 38, 38, 38, - 57, 58, 59, 61, 43, 44, 45, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 57, - 58, 59, 44, 45, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 57, 58, 59, 45, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 57, 58, 59, 57, 58, 58, 43, 44, - 45, 38, 38, 38, 38, 38, 38, 38, 38, 38, 53, 54, 55, 38, 38, 38, 38, 57, 58, 59, 61, 43, 44, 45, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 54, 55, 38, 38, 38, 38, 57, 58, 59, 61, 43, 44, 45, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 55, 38, 38, 38, 38, 57, 58, 59, 61, 63, 43, 44, 45, 38, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 57, 58, 59, 61, 42, 43, 44, 45, 38, - 38, 38, 38, 38, 38, 50, 51, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 42, 43, 44, 45, 38, - 38, 38, 38, 38, 38, 38, 51, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 42, 43, 44, 45, 38, - 38, 38, 38, 38, 38, 38, 38, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 41, 42, 43, 44, 45, - 38, 47, 41, 38, 38, 38, 50, 51, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 41, 42, 43, 44, - 45, 38, 38, 41, 38, 38, 38, 50, 51, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 41, 42, 43, - 44, 45, 46, 47, 41, 38, 38, 38, 50, 51, 52, 53, 54, 55, 38, 38, 38, 42, 57, 58, 59, 61, 39, 40, - 38, 38, 38, 41, 42, 43, 44, 45, 46, 47, 41, 48, 38, 49, 50, 51, 52, 53, 54, 55, 38, 38, 38, 56, - 57, 58, 59, 40, 39, 60, 60, 60, 60, 60, 60, 60, 60, 60, 42, 43, 44, 45, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 53, 54, 55, 60, 60, 60, 60, 57, 58, 59, 61, 65, 7, 39, 40, 38, 38, 38, 41, 42, 43, - 44, 45, 46, 47, 41, 48, 5, 49, 50, 51, 52, 53, 54, 55, 12, 67, 38, 56, 57, 58, 59, 40, 12, 67, - 67, 1, 70, 69, 69, 69, 13, 14, 15, 16, 17, 18, 19, 13, 20, 22, 22, 23, 24, 25, 26, 27, 28, 69, - 69, 69, 32, 33, 34, 35, 70, 13, 14, 15, 16, 17, 69, 69, 69, 69, 69, 69, 23, 24, 25, 26, 27, 28, - 69, 69, 69, 14, 33, 34, 35, 71, 14, 15, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 26, 27, 28, - 69, 69, 69, 69, 33, 34, 35, 71, 15, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 33, 34, 35, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 33, 34, 35, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 33, 34, 35, 33, - 34, 34, 15, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 26, 27, 28, 69, 69, 69, 69, 33, 34, 35, - 71, 15, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 27, 28, 69, 69, 69, 69, 33, 34, 35, 71, - 15, 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 28, 69, 69, 69, 69, 33, 34, 35, 71, 15, - 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 33, 34, 35, 71, 14, 15, - 16, 17, 69, 69, 69, 69, 69, 69, 23, 24, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, 71, 14, 15, - 16, 17, 69, 69, 69, 69, 69, 69, 69, 24, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, 71, 14, 15, - 16, 17, 69, 69, 69, 69, 69, 69, 69, 69, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, 71, 13, 14, - 15, 16, 17, 69, 19, 13, 69, 69, 69, 23, 24, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, 71, 13, - 14, 15, 16, 17, 69, 69, 13, 69, 69, 69, 23, 24, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, 71, - 13, 14, 15, 16, 17, 18, 19, 13, 69, 69, 69, 23, 24, 25, 26, 27, 28, 69, 69, 69, 14, 33, 34, 35, - 71, 1, 70, 69, 69, 69, 13, 14, 15, 16, 17, 18, 19, 13, 20, 69, 22, 23, 24, 25, 26, 27, 28, 69, - 69, 69, 32, 33, 34, 35, 70, 1, 69, 69, 69, 69, 69, 69, 69, 69, 69, 14, 15, 16, 17, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 26, 27, 28, 69, 69, 69, 69, 33, 34, 35, 71, 1, 73, 10, 5, 69, 69, 5, 1, - 70, 10, 69, 69, 13, 14, 15, 16, 17, 18, 19, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 69, - 32, 33, 34, 35, 70, 1, 70, 69, 69, 69, 13, 14, 15, 16, 17, 18, 19, 13, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 69, 69, 69, 32, 33, 34, 35, 70, 29, 30, 30, 5, 72, 72, 5, 75, 74, 36, 36, 75, 74, - 75, 36, 74, 37, 0, 0, +static _use_syllable_machine_indices: [i8; 1077] = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 10, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 7, 33, 3, 34, 1, 36, 36, 38, 39, 37, 37, 40, 41, 42, 43, 44, 45, + 46, 40, 47, 2, 48, 49, 50, 51, 52, 53, 54, 37, 37, 37, 55, 56, 57, 58, 39, 38, 39, 37, 37, 40, + 41, 42, 43, 44, 45, 46, 40, 47, 48, 48, 49, 50, 51, 52, 53, 54, 37, 37, 37, 55, 56, 57, 58, 39, + 38, 40, 41, 42, 43, 44, 37, 37, 37, 37, 37, 37, 49, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, + 58, 60, 41, 42, 43, 44, 37, 37, 37, 37, 37, 37, 37, 37, 37, 52, 53, 54, 37, 37, 37, 37, 56, 57, + 58, 60, 42, 43, 44, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 56, 57, 58, + 43, 44, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 56, 57, 58, 44, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 56, 57, 58, 56, 57, 57, 42, 43, 44, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 52, 53, 54, 37, 37, 37, 37, 56, 57, 58, 60, 42, 43, 44, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 53, 54, 37, 37, 37, 37, 56, 57, 58, 60, 42, 43, 44, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 54, 37, 37, 37, 37, 56, 57, 58, 60, 62, 42, 43, 44, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 56, 57, 58, 60, 41, 42, 43, 44, 37, 37, 37, + 37, 37, 37, 49, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 41, 42, 43, 44, 37, 37, 37, + 37, 37, 37, 37, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 41, 42, 43, 44, 37, 37, 37, + 37, 37, 37, 37, 37, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 40, 41, 42, 43, 44, 37, 46, + 40, 37, 37, 37, 49, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 40, 41, 42, 43, 44, 37, + 37, 40, 37, 37, 37, 49, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 40, 41, 42, 43, 44, + 45, 46, 40, 37, 37, 37, 49, 50, 51, 52, 53, 54, 37, 37, 37, 41, 56, 57, 58, 60, 38, 39, 37, 37, + 40, 41, 42, 43, 44, 45, 46, 40, 47, 37, 48, 49, 50, 51, 52, 53, 54, 37, 37, 37, 55, 56, 57, 58, + 39, 38, 59, 59, 59, 59, 59, 59, 59, 59, 41, 42, 43, 44, 59, 59, 59, 59, 59, 59, 59, 59, 59, 52, + 53, 54, 59, 59, 59, 59, 56, 57, 58, 60, 64, 4, 38, 39, 37, 37, 40, 41, 42, 43, 44, 45, 46, 40, + 47, 2, 48, 49, 50, 51, 52, 53, 54, 1, 36, 37, 55, 56, 57, 58, 39, 6, 7, 66, 66, 10, 11, 12, 13, + 14, 15, 16, 10, 17, 19, 19, 20, 21, 22, 23, 24, 25, 66, 66, 66, 29, 30, 31, 32, 7, 6, 10, 11, + 12, 13, 14, 66, 66, 66, 66, 66, 66, 20, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 11, + 12, 13, 14, 66, 66, 66, 66, 66, 66, 66, 66, 66, 23, 24, 25, 66, 66, 66, 66, 30, 31, 32, 67, 12, + 13, 14, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 30, 31, 32, 13, 14, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 30, 31, 32, 14, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 30, 31, 32, 30, 31, 31, 12, 13, 14, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 23, 24, 25, 66, 66, 66, 66, 30, 31, 32, 67, 12, 13, 14, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 24, 25, 66, 66, 66, 66, 30, 31, 32, 67, 12, 13, 14, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 25, 66, 66, 66, 66, 30, 31, 32, 67, 68, 12, 13, 14, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 30, 31, 32, 67, 11, 12, 13, 14, 66, 66, 66, 66, 66, 66, + 20, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 11, 12, 13, 14, 66, 66, 66, 66, 66, 66, + 66, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 11, 12, 13, 14, 66, 66, 66, 66, 66, 66, + 66, 66, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 10, 11, 12, 13, 14, 66, 16, 10, 66, 66, + 66, 20, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 10, 11, 12, 13, 14, 66, 66, 10, 66, + 66, 66, 20, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 10, 11, 12, 13, 14, 15, 16, 10, + 66, 66, 66, 20, 21, 22, 23, 24, 25, 66, 66, 66, 11, 30, 31, 32, 67, 6, 7, 66, 66, 10, 11, 12, + 13, 14, 15, 16, 10, 17, 66, 19, 20, 21, 22, 23, 24, 25, 66, 66, 66, 29, 30, 31, 32, 7, 6, 66, + 66, 66, 66, 66, 66, 66, 66, 11, 12, 13, 14, 66, 66, 66, 66, 66, 66, 66, 66, 66, 23, 24, 25, 66, + 66, 66, 66, 30, 31, 32, 67, 69, 8, 2, 66, 66, 2, 6, 7, 8, 66, 10, 11, 12, 13, 14, 15, 16, 10, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 66, 29, 30, 31, 32, 7, 6, 7, 66, 66, 10, 11, 12, + 13, 14, 15, 16, 10, 17, 18, 19, 20, 21, 22, 23, 24, 25, 66, 66, 66, 29, 30, 31, 32, 7, 26, 27, + 27, 2, 70, 70, 2, 72, 71, 33, 33, 72, 71, 72, 33, 71, 34, 0, 0, ]; -static _use_syllable_machine_index_defaults: [i8; 64] = [ - 0, 0, 6, 38, 38, 60, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 62, 38, 38, 38, 38, 38, 38, 38, - 38, 60, 64, 66, 38, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 72, 69, 69, 69, 69, 69, 69, 72, 74, 74, 74, 0, 0, +static _use_syllable_machine_index_defaults: [i8; 63] = [ + 3, 35, 35, 37, 37, 59, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 61, 37, 37, 37, 37, 37, 37, 37, + 37, 59, 63, 65, 37, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 70, 71, 71, 71, 0, 0, ]; -static _use_syllable_machine_cond_targs: [i8; 78] = [ - 2, 31, 42, 2, 2, 3, 2, 26, 28, 51, 52, 54, 29, 32, 33, 34, 35, 36, 46, 47, 48, 55, 49, 43, 44, - 45, 39, 40, 41, 56, 57, 58, 50, 37, 38, 2, 59, 61, 2, 4, 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 18, - 19, 20, 13, 14, 15, 25, 11, 12, 2, 2, 16, 2, 17, 2, 27, 2, 30, 2, 2, 0, 1, 2, 53, 2, 60, 0, 0, +static _use_syllable_machine_cond_targs: [i8; 75] = [ + 0, 1, 3, 0, 26, 28, 29, 30, 51, 53, 31, 32, 33, 34, 35, 46, 47, 48, 54, 49, 43, 44, 45, 38, 39, + 40, 55, 56, 57, 50, 36, 37, 0, 58, 60, 0, 2, 0, 4, 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 18, 19, + 20, 13, 14, 15, 25, 11, 12, 0, 0, 16, 0, 17, 0, 27, 0, 0, 41, 42, 52, 0, 0, 59, 0, 0, ]; -static _use_syllable_machine_cond_actions: [i8; 78] = [ - 1, 2, 2, 0, 5, 0, 6, 0, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, - 2, 0, 0, 7, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 11, - 0, 12, 0, 13, 0, 14, 15, 0, 0, 16, 0, 17, 0, 0, 0, +static _use_syllable_machine_cond_actions: [i8; 75] = [ + 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 0, 9, 0, + 10, 0, 11, 12, 0, 0, 0, 13, 14, 0, 0, 0, ]; -static _use_syllable_machine_to_state_actions: [i8; 64] = [ - 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +static _use_syllable_machine_to_state_actions: [i8; 63] = [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -static _use_syllable_machine_from_state_actions: [i8; 64] = [ - 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +static _use_syllable_machine_from_state_actions: [i8; 63] = [ + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -static _use_syllable_machine_eof_trans: [i8; 64] = [ - 1, 1, 4, 39, 39, 61, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 63, 39, 39, 39, 39, 39, 39, 39, - 39, 61, 65, 67, 39, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, - 70, 70, 70, 73, 70, 70, 70, 70, 70, 70, 73, 75, 75, 75, 0, 0, +static _use_syllable_machine_eof_trans: [i8; 63] = [ + 1, 36, 36, 38, 38, 60, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 62, 38, 38, 38, 38, 38, 38, 38, + 38, 60, 64, 66, 38, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 67, 71, 72, 72, 72, 0, 0, ]; -static use_syllable_machine_start: i32 = 2; -static use_syllable_machine_first_final: i32 = 2; +static use_syllable_machine_start: i32 = 0; +static use_syllable_machine_first_final: i32 = 0; static use_syllable_machine_error: i32 = -1; -static use_syllable_machine_en_main: i32 = 2; +static use_syllable_machine_en_main: i32 = 0; #[derive(Clone, Copy)] pub enum SyllableType { IndependentCluster, @@ -169,7 +164,7 @@ pub fn find_syllables(buffer: &mut Buffer) { '_resume: while (p != pe || p == eof) { '_again: while (true) { match (_use_syllable_machine_from_state_actions[(cs) as usize]) { - 4 => { + 2 => { ts = p; } @@ -228,102 +223,87 @@ pub fn find_syllables(buffer: &mut Buffer) { if (_use_syllable_machine_cond_actions[(_trans) as usize] != 0) { { match (_use_syllable_machine_cond_actions[(_trans) as usize]) { - 2 => { - te = p + 1; - } - 5 => { - te = p + 1; - { - found_syllable!(SyllableType::IndependentCluster); - } - } - 9 => { + 7 => { te = p + 1; { found_syllable!(SyllableType::StandardCluster); } } - 7 => { + 4 => { te = p + 1; { found_syllable!(SyllableType::BrokenCluster); } } - 6 => { + 3 => { te = p + 1; { found_syllable!(SyllableType::NonCluster); } } - 10 => { + 8 => { te = p; p = p - 1; { found_syllable!(SyllableType::ViramaTerminatedCluster); } } - 11 => { + 9 => { te = p; p = p - 1; { found_syllable!(SyllableType::SakotTerminatedCluster); } } - 8 => { + 6 => { te = p; p = p - 1; { found_syllable!(SyllableType::StandardCluster); } } - 13 => { + 11 => { te = p; p = p - 1; { found_syllable!(SyllableType::NumberJoinerTerminatedCluster); } } - 12 => { + 10 => { te = p; p = p - 1; { found_syllable!(SyllableType::NumeralCluster); } } - 14 => { + 5 => { te = p; p = p - 1; { found_syllable!(SyllableType::SymbolCluster); } } - 17 => { + 14 => { te = p; p = p - 1; { found_syllable!(SyllableType::HieroglyphCluster); } } - 15 => { + 12 => { te = p; p = p - 1; { found_syllable!(SyllableType::BrokenCluster); } } - 16 => { + 13 => { te = p; p = p - 1; { found_syllable!(SyllableType::NonCluster); } } - 1 => { - p = (te) - 1; - { - found_syllable!(SyllableType::BrokenCluster); - } - } _ => {} } @@ -333,14 +313,14 @@ pub fn find_syllables(buffer: &mut Buffer) { } if (p == eof) { { - if (cs >= 2) { + if (cs >= 0) { break '_resume; } } } else { { match (_use_syllable_machine_to_state_actions[(cs) as usize]) { - 3 => { + 1 => { ts = p0; } @@ -376,18 +356,18 @@ fn found_syllable( } } -fn not_standard_default_ignorable(i: &GlyphInfo) -> bool { - !(matches!(i.use_category(), category::O | category::RSV) && i.is_default_ignorable()) +fn not_ccs_default_ignorable(i: &GlyphInfo) -> bool { + !(matches!(i.use_category(), category::CGJ | category::RSV) && i.is_default_ignorable()) } fn included(infos: &[Cell], i: usize) -> bool { let glyph = infos[i].get(); - if !not_standard_default_ignorable(&glyph) { + if !not_ccs_default_ignorable(&glyph) { return false; } if glyph.use_category() == category::ZWNJ { for glyph2 in &infos[i + 1..] { - if not_standard_default_ignorable(&glyph2.get()) { + if not_ccs_default_ignorable(&glyph2.get()) { return !glyph2.get().is_unicode_mark(); } } diff --git a/src/complex/universal_table.rs b/src/complex/universal_table.rs index 1e016491..5b77a435 100644 --- a/src/complex/universal_table.rs +++ b/src/complex/universal_table.rs @@ -17,6 +17,9 @@ const USE_TABLE: &[Category] = &[ /* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 00D0 */ O, O, O, O, O, O, O, GB, + /* Combining Diacritical Marks */ + O, O, O, O, O, O, O, CGJ, + /* Arabic */ /* 0640 */ B, O, O, O, O, O, O, O, @@ -196,7 +199,7 @@ const USE_TABLE: &[Category] = &[ /* 1780 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1790 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 17B0 */ B, B, B, B, O, O, VPST, VABV, VABV, VABV, VABV, VBLW, VBLW, VBLW, VPRE, VPRE, + /* 17B0 */ B, B, B, B, CGJ, CGJ, VPST, VABV, VABV, VABV, VABV, VBLW, VBLW, VBLW, VPRE, VPRE, /* 17C0 */ VPRE, VPRE, VPRE, VPRE, VPRE, VPRE, VMABV, VMPST, VPST, VMABV, VMABV, FMABV, FABV, CMABV, FMABV, VMABV, /* 17D0 */ FMABV, VABV, H, FMABV, O, O, O, O, O, O, O, O, B, FMABV, O, O, /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, @@ -204,7 +207,7 @@ const USE_TABLE: &[Category] = &[ /* Mongolian */ - /* 1800 */ B, O, O, O, O, O, O, B, O, O, B, O, O, O, O, O, + /* 1800 */ B, O, O, O, O, O, O, B, O, O, B, CGJ, CGJ, CGJ, O, CGJ, /* 1810 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 1820 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1830 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, @@ -265,7 +268,7 @@ const USE_TABLE: &[Category] = &[ /* 1B30 */ B, B, B, B, CMABV, VPST, VABV, VABV, VBLW, VBLW, VBLW, VBLW, VABV, VABV, VPRE, VPRE, /* 1B40 */ VPRE, VPRE, VABV, VABV, H, B, B, B, B, B, B, B, B, O, O, O, /* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB, - /* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMABV, SMBLW, SMABV, SMABV, SMABV, + /* 1B60 */ O, O, GB, O, O, O, O, O, GB, O, O, SMABV, SMBLW, SMABV, SMABV, SMABV, /* 1B70 */ SMABV, SMABV, SMABV, SMABV, O, O, O, O, O, O, O, O, O, O, O, O, /* Sundanese */ @@ -300,7 +303,7 @@ const USE_TABLE: &[Category] = &[ O, O, O, FMABV, O, O, O, O, /* General Punctuation */ - O, O, O, O, ZWNJ, O, O, O, + O, O, O, O, ZWNJ, CGJ, O, O, /* 2010 */ GB, GB, GB, GB, GB, O, O, O, /* Superscripts and Subscripts */ @@ -414,6 +417,18 @@ const USE_TABLE: &[Category] = &[ /* ABE0 */ B, B, B, VPST, VPST, VABV, VPST, VPST, VBLW, VPST, VPST, O, VMPST, VBLW, O, O, /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* Variation Selectors */ + + /* FE00 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + + /* Vithkuqi */ + + /* 10570 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B, + /* 10580 */ B, B, B, B, B, B, B, B, B, B, B, O, B, B, B, B, + /* 10590 */ B, B, B, O, B, B, O, B, B, B, B, B, B, B, B, B, + /* 105A0 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 105B0 */ B, B, O, B, B, B, B, B, B, B, O, B, B, O, O, O, + /* Kharoshthi */ /* 10A00 */ B, VBLW, VBLW, VBLW, O, VABV, VBLW, O, O, O, O, O, VPST, VMBLW, VMBLW, VMABV, @@ -426,7 +441,7 @@ const USE_TABLE: &[Category] = &[ /* 10AC0 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B, /* 10AD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 10AE0 */ B, B, B, B, B, CMBLW, CMBLW, O, + /* 10AE0 */ B, B, B, B, B, CMBLW, CMBLW, O, O, O, O, B, B, B, B, B, /* Psalter Pahlavi */ @@ -452,7 +467,15 @@ const USE_TABLE: &[Category] = &[ /* 10F30 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 10F40 */ B, B, B, B, B, B, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, VMBLW, - /* 10F50 */ VMBLW, B, B, B, B, O, O, O, + /* 10F50 */ VMBLW, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, + /* 10F60 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Old Uyghur */ + + /* 10F70 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 10F80 */ B, B, CMBLW, CMBLW, CMBLW, CMBLW, O, O, O, O, O, O, O, O, O, O, + /* 10F90 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 10FA0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* Chorasmian */ @@ -468,7 +491,7 @@ const USE_TABLE: &[Category] = &[ /* 11010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11030 */ B, B, B, B, B, B, B, B, VABV, VABV, VABV, VABV, VBLW, VBLW, VBLW, VBLW, - /* 11040 */ VBLW, VBLW, VABV, VABV, VABV, VABV, HVM, O, O, O, O, O, O, O, O, O, + /* 11040 */ VBLW, VBLW, VABV, VABV, VABV, VABV, H, O, O, O, O, O, O, O, O, O, /* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B, /* 11070 */ VABV, B, B, VABV, VABV, B, O, O, O, O, O, O, O, O, O, HN, @@ -685,77 +708,84 @@ const USE_TABLE: &[Category] = &[ /* Egyptian Hieroglyphs */ - /* 13000 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13010 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13020 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13030 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13040 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13050 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13060 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13070 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13080 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13090 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130A0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130B0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130C0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130D0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130E0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 130F0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13100 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13110 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13120 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13130 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13140 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13150 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13160 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13170 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13180 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13190 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131A0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131B0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131C0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131D0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131E0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 131F0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13200 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13210 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13220 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13230 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13240 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13250 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13260 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13270 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13280 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13290 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132A0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132B0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132C0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132D0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132E0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 132F0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13300 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13310 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13320 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13330 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13340 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13350 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13360 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13370 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13380 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13390 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133A0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133B0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133C0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133D0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133E0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 133F0 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13400 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13410 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, - /* 13420 */ G, G, G, G, G, G, G, G, G, G, G, G, G, G, G, O, + /* 13000 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13030 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13040 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13050 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13060 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13070 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13080 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 130F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13100 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13120 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13130 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13140 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13150 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13160 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13170 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13180 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13190 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 131F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13200 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13210 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13220 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13230 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13240 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13250 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13260 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13270 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13280 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 132F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13300 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13310 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13320 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13330 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13340 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13350 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13360 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13370 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13380 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13390 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133E0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 133F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13400 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13410 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 13420 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, /* Egyptian Hieroglyph Format Controls */ - /* 13430 */ J, J, J, J, J, J, J, SB, SE, O, O, O, O, O, O, O, + /* 13430 */ H, H, H, H, H, H, H, B, B, O, O, O, O, O, O, O, + + /* Tangsa */ + + /* 16AC0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* 16AD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 16AE0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 16AF0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* Pahawh Hmong */ @@ -835,6 +865,12 @@ const USE_TABLE: &[Category] = &[ /* 1E130 */ VMABV, VMABV, VMABV, VMABV, VMABV, VMABV, VMABV, B, B, B, B, B, B, B, O, O, /* 1E140 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, B, B, + /* Toto */ + + /* 1E290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1E2A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, VMABV, O, + /* 1E2B0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* Wancho */ /* 1E2C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, @@ -851,54 +887,75 @@ const USE_TABLE: &[Category] = &[ /* 1E940 */ B, B, B, B, CMABV, CMABV, CMABV, CMABV, CMABV, CMABV, CMABV, B, O, O, O, O, /* 1E950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* Variation Selectors Supplement */ + + /* E0100 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0110 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0120 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0130 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0140 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0150 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0160 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0170 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0180 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0190 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01A0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01B0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01C0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01D0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01E0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + ]; const USE_OFFSET_0X0028: usize = 0; const USE_OFFSET_0X00A0: usize = 24; -const USE_OFFSET_0X0640: usize = 80; -const USE_OFFSET_0X07C8: usize = 88; -const USE_OFFSET_0X0840: usize = 144; -const USE_OFFSET_0X0900: usize = 176; -const USE_OFFSET_0X0F00: usize = 1448; -const USE_OFFSET_0X1000: usize = 1648; -const USE_OFFSET_0X1700: usize = 1808; -const USE_OFFSET_0X1900: usize = 2240; -const USE_OFFSET_0X1B00: usize = 2656; -const USE_OFFSET_0X1CD0: usize = 2992; -const USE_OFFSET_0X1DF8: usize = 3040; -const USE_OFFSET_0X2008: usize = 3048; -const USE_OFFSET_0X2070: usize = 3064; -const USE_OFFSET_0X20F0: usize = 3088; -const USE_OFFSET_0X25C8: usize = 3096; -const USE_OFFSET_0X2D30: usize = 3104; -const USE_OFFSET_0XA800: usize = 3184; -const USE_OFFSET_0XABC0: usize = 3944; -const USE_OFFSET_0X10A00: usize = 4008; -const USE_OFFSET_0X10AC0: usize = 4088; -const USE_OFFSET_0X10B80: usize = 4128; -const USE_OFFSET_0X10D00: usize = 4176; -const USE_OFFSET_0X10E80: usize = 4240; -const USE_OFFSET_0X10F30: usize = 4296; -const USE_OFFSET_0X10FB0: usize = 4336; -const USE_OFFSET_0X11100: usize = 4616; -const USE_OFFSET_0X11280: usize = 4936; -const USE_OFFSET_0X11400: usize = 5184; -const USE_OFFSET_0X11580: usize = 5408; -const USE_OFFSET_0X11800: usize = 5864; -const USE_OFFSET_0X11900: usize = 5928; -const USE_OFFSET_0X119A0: usize = 6024; -const USE_OFFSET_0X11C00: usize = 6280; -const USE_OFFSET_0X11D00: usize = 6464; -const USE_OFFSET_0X11EE0: usize = 6640; -const USE_OFFSET_0X13000: usize = 6664; -const USE_OFFSET_0X16B00: usize = 7752; -const USE_OFFSET_0X16F00: usize = 7808; -const USE_OFFSET_0X16FE0: usize = 7960; -const USE_OFFSET_0X18B00: usize = 7968; -const USE_OFFSET_0X1BC00: usize = 8440; -const USE_OFFSET_0X1E100: usize = 8600; -const USE_OFFSET_0X1E2C0: usize = 8680; -const USE_OFFSET_0X1E900: usize = 8744; +const USE_OFFSET_0X0348: usize = 80; +const USE_OFFSET_0X0640: usize = 88; +const USE_OFFSET_0X07C8: usize = 96; +const USE_OFFSET_0X0840: usize = 152; +const USE_OFFSET_0X0900: usize = 184; +const USE_OFFSET_0X0F00: usize = 1456; +const USE_OFFSET_0X1000: usize = 1656; +const USE_OFFSET_0X1700: usize = 1816; +const USE_OFFSET_0X1900: usize = 2248; +const USE_OFFSET_0X1B00: usize = 2664; +const USE_OFFSET_0X1CD0: usize = 3000; +const USE_OFFSET_0X1DF8: usize = 3048; +const USE_OFFSET_0X2008: usize = 3056; +const USE_OFFSET_0X2070: usize = 3072; +const USE_OFFSET_0X20F0: usize = 3096; +const USE_OFFSET_0X25C8: usize = 3104; +const USE_OFFSET_0X2D30: usize = 3112; +const USE_OFFSET_0XA800: usize = 3192; +const USE_OFFSET_0XABC0: usize = 3952; +const USE_OFFSET_0XFE00: usize = 4016; +const USE_OFFSET_0X10570: usize = 4032; +const USE_OFFSET_0X10A00: usize = 4112; +const USE_OFFSET_0X10AC0: usize = 4192; +const USE_OFFSET_0X10B80: usize = 4240; +const USE_OFFSET_0X10D00: usize = 4288; +const USE_OFFSET_0X10E80: usize = 4352; +const USE_OFFSET_0X10F30: usize = 4408; +const USE_OFFSET_0X11100: usize = 4816; +const USE_OFFSET_0X11280: usize = 5136; +const USE_OFFSET_0X11400: usize = 5384; +const USE_OFFSET_0X11580: usize = 5608; +const USE_OFFSET_0X11800: usize = 6064; +const USE_OFFSET_0X11900: usize = 6128; +const USE_OFFSET_0X119A0: usize = 6224; +const USE_OFFSET_0X11C00: usize = 6480; +const USE_OFFSET_0X11D00: usize = 6664; +const USE_OFFSET_0X11EE0: usize = 6840; +const USE_OFFSET_0X13000: usize = 6864; +const USE_OFFSET_0X16AC0: usize = 7952; +const USE_OFFSET_0X16F00: usize = 8072; +const USE_OFFSET_0X16FE0: usize = 8224; +const USE_OFFSET_0X18B00: usize = 8232; +const USE_OFFSET_0X1BC00: usize = 8704; +const USE_OFFSET_0X1E100: usize = 8864; +const USE_OFFSET_0X1E290: usize = 8944; +const USE_OFFSET_0X1E900: usize = 9056; +const USE_OFFSET_0XE0100: usize = 9152; #[rustfmt::skip] pub fn get_category(u: u32) -> Category { @@ -906,6 +963,7 @@ pub fn get_category(u: u32) -> Category { 0x0 => { if (0x0028..=0x003F).contains(&u) { return USE_TABLE[u as usize - 0x0028 + USE_OFFSET_0X0028]; } if (0x00A0..=0x00D7).contains(&u) { return USE_TABLE[u as usize - 0x00A0 + USE_OFFSET_0X00A0]; } + if (0x0348..=0x034F).contains(&u) { return USE_TABLE[u as usize - 0x0348 + USE_OFFSET_0X0348]; } if (0x0640..=0x0647).contains(&u) { return USE_TABLE[u as usize - 0x0640 + USE_OFFSET_0X0640]; } if (0x07C8..=0x07FF).contains(&u) { return USE_TABLE[u as usize - 0x07C8 + USE_OFFSET_0X07C8]; } if (0x0840..=0x085F).contains(&u) { return USE_TABLE[u as usize - 0x0840 + USE_OFFSET_0X0840]; } @@ -931,17 +989,20 @@ pub fn get_category(u: u32) -> Category { if (0xA800..=0xAAF7).contains(&u) { return USE_TABLE[u as usize - 0xA800 + USE_OFFSET_0XA800]; } if (0xABC0..=0xABFF).contains(&u) { return USE_TABLE[u as usize - 0xABC0 + USE_OFFSET_0XABC0]; } } + 0xF => { + if (0xFE00..=0xFE0F).contains(&u) { return USE_TABLE[u as usize - 0xFE00 + USE_OFFSET_0XFE00]; } + } 0x10 => { + if (0x10570..=0x105BF).contains(&u) { return USE_TABLE[u as usize - 0x10570 + USE_OFFSET_0X10570]; } if (0x10A00..=0x10A4F).contains(&u) { return USE_TABLE[u as usize - 0x10A00 + USE_OFFSET_0X10A00]; } - if (0x10AC0..=0x10AE7).contains(&u) { return USE_TABLE[u as usize - 0x10AC0 + USE_OFFSET_0X10AC0]; } + if (0x10AC0..=0x10AEF).contains(&u) { return USE_TABLE[u as usize - 0x10AC0 + USE_OFFSET_0X10AC0]; } if (0x10B80..=0x10BAF).contains(&u) { return USE_TABLE[u as usize - 0x10B80 + USE_OFFSET_0X10B80]; } if (0x10D00..=0x10D3F).contains(&u) { return USE_TABLE[u as usize - 0x10D00 + USE_OFFSET_0X10D00]; } if (0x10E80..=0x10EB7).contains(&u) { return USE_TABLE[u as usize - 0x10E80 + USE_OFFSET_0X10E80]; } - if (0x10F30..=0x10F57).contains(&u) { return USE_TABLE[u as usize - 0x10F30 + USE_OFFSET_0X10F30]; } - if (0x10FB0..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } + if (0x10F30..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10F30 + USE_OFFSET_0X10F30]; } } 0x11 => { - if (0x10FB0..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } + if (0x10F30..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10F30 + USE_OFFSET_0X10F30]; } if (0x11100..=0x1123F).contains(&u) { return USE_TABLE[u as usize - 0x11100 + USE_OFFSET_0X11100]; } if (0x11280..=0x11377).contains(&u) { return USE_TABLE[u as usize - 0x11280 + USE_OFFSET_0X11280]; } if (0x11400..=0x114DF).contains(&u) { return USE_TABLE[u as usize - 0x11400 + USE_OFFSET_0X11400]; } @@ -957,7 +1018,7 @@ pub fn get_category(u: u32) -> Category { if (0x13000..=0x1343F).contains(&u) { return USE_TABLE[u as usize - 0x13000 + USE_OFFSET_0X13000]; } } 0x16 => { - if (0x16B00..=0x16B37).contains(&u) { return USE_TABLE[u as usize - 0x16B00 + USE_OFFSET_0X16B00]; } + if (0x16AC0..=0x16B37).contains(&u) { return USE_TABLE[u as usize - 0x16AC0 + USE_OFFSET_0X16AC0]; } if (0x16F00..=0x16F97).contains(&u) { return USE_TABLE[u as usize - 0x16F00 + USE_OFFSET_0X16F00]; } if (0x16FE0..=0x16FE7).contains(&u) { return USE_TABLE[u as usize - 0x16FE0 + USE_OFFSET_0X16FE0]; } } @@ -969,9 +1030,12 @@ pub fn get_category(u: u32) -> Category { } 0x1E => { if (0x1E100..=0x1E14F).contains(&u) { return USE_TABLE[u as usize - 0x1E100 + USE_OFFSET_0X1E100]; } - if (0x1E2C0..=0x1E2FF).contains(&u) { return USE_TABLE[u as usize - 0x1E2C0 + USE_OFFSET_0X1E2C0]; } + if (0x1E290..=0x1E2FF).contains(&u) { return USE_TABLE[u as usize - 0x1E290 + USE_OFFSET_0X1E290]; } if (0x1E900..=0x1E95F).contains(&u) { return USE_TABLE[u as usize - 0x1E900 + USE_OFFSET_0X1E900]; } } + 0xE0 => { + if (0xE0100..=0xE01EF).contains(&u) { return USE_TABLE[u as usize - 0xE0100 + USE_OFFSET_0XE0100]; } + } _ => {} } diff --git a/src/plan.rs b/src/plan.rs index a5c6909c..b29fb4b8 100644 --- a/src/plan.rs +++ b/src/plan.rs @@ -289,18 +289,20 @@ impl<'a> ShapePlanner<'a> { let mut apply_kern = false; // Decide who does positioning. GPOS, kerx, kern, or fallback. + let has_kerx = self.face.tables().kerx.is_some(); let has_gsub = self.face.tables().gsub.is_some(); let has_gpos = !disable_gpos && self.face.tables().gpos.is_some(); - if self.face.tables().kerx.is_some() && !(has_gsub && has_gpos) { + // Prefer GPOS over kerx if GSUB is present; + // https://github.com/harfbuzz/harfbuzz/issues/3008 + if has_kerx && !(has_gsub && has_gpos) { apply_kerx = true; - } else if !apply_morx && has_gpos { + } else if has_gpos { apply_gpos = true; } if !apply_kerx && (!has_gpos_kern || !apply_gpos) { - // Apparently Apple applies kerx if GPOS kern was not applied. - if self.face.tables().kerx.is_some() { + if has_kerx { apply_kerx = true; } else if ot::has_kerning(self.face) { apply_kern = true; diff --git a/src/unicode.rs b/src/unicode.rs index 1fbc0426..c8f18ff3 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -651,7 +651,7 @@ impl CharExt for char { /// As such, we make exceptions for those four. /// Also ignoring U+1BCA0..1BCA3. https://github.com/harfbuzz/harfbuzz/issues/503 /// - /// Unicode 7.0: + /// Unicode 14.0: /// $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/' /// 00AD # Cf SOFT HYPHEN /// 034F # Mn COMBINING GRAPHEME JOINER @@ -660,6 +660,7 @@ impl CharExt for char { /// 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA /// 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE /// 180E # Cf MONGOLIAN VOWEL SEPARATOR + /// 180F # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR /// 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK /// 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE /// 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS @@ -711,8 +712,8 @@ impl CharExt for char { } fn is_variation_selector(self) -> bool { - // U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the - // Arabic shaper. No need to match them here. + // U+180B..180D, U+180F MONGOLIAN FREE VARIATION SELECTORs are handled in the + //Arabic shaper. No need to match them here. let ch = u32::from(self); (0x0FE00..=0x0FE0F).contains(&ch) || // VARIATION SELECTOR - 1..16 (0xE0100..=0xE01EF).contains(&ch) // VARIATION SELECTOR - 17..256 diff --git a/tests/fonts/in-house/e2b17207c4b7ad78d843e1b0c4d00b09398a1137.ttf b/tests/fonts/in-house/e2b17207c4b7ad78d843e1b0c4d00b09398a1137.ttf new file mode 100644 index 00000000..b499ce07 Binary files /dev/null and b/tests/fonts/in-house/e2b17207c4b7ad78d843e1b0c4d00b09398a1137.ttf differ diff --git a/tests/fonts/in-house/e6185e88b04432fbf373594d5971686bb7dd698d.ttf b/tests/fonts/in-house/e6185e88b04432fbf373594d5971686bb7dd698d.ttf new file mode 100644 index 00000000..5234b061 Binary files /dev/null and b/tests/fonts/in-house/e6185e88b04432fbf373594d5971686bb7dd698d.ttf differ diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index 482f390f..8ad60296 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -18,6 +18,22 @@ fn aat_morx_001() { ); } +#[test] +fn aat_morx_002() { + assert_eq!( + shape( + "tests/fonts/in-house/e6185e88b04432fbf373594d5971686bb7dd698d.ttf", + "\u{0B95}\u{0BCD}\u{0020}\u{0B9A}\u{0BCD}", + "", + ), + "ka-tamil=0+825|\ + pulli-tamil=0@-392,0+0|\ + space=2+260|\ + ca-tamil=3+723|\ + pulli-tamil=3@-320,0+0" + ); +} + #[test] fn aat_trak_001() { assert_eq!( @@ -2371,6 +2387,20 @@ fn indic_syllable_012() { ); } +#[test] +fn indic_syllable_013() { + assert_eq!( + shape( + "tests/fonts/in-house/e2b17207c4b7ad78d843e1b0c4d00b09398a1137.ttf", + "\u{0BAA}\u{0BAA}\u{0BCD}", + "", + ), + "pa-tamil=0+778|\ + pa-tamil.001=1+778|\ + pulli-tamil=1@-385,0+0" + ); +} + #[test] fn indic_vowel_letter_spoofing_001() { assert_eq!( @@ -10627,6 +10657,7 @@ fn use_syllable_022() { "", ), "u11124=0+514|\ + uni25CC=1+547|\ u11127=1+0" ); }