From 98e1a00a030061794900573e3e7bb8b199471b3e Mon Sep 17 00:00:00 2001
From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com>
Date: Wed, 7 Feb 2024 20:38:31 +0100
Subject: [PATCH] Improve formatting

---
 scripts/gen-universal-table.py | 204 ++++++++++++++++++++++-----------
 1 file changed, 138 insertions(+), 66 deletions(-)

diff --git a/scripts/gen-universal-table.py b/scripts/gen-universal-table.py
index 1a40fd7a..edb77e28 100755
--- a/scripts/gen-universal-table.py
+++ b/scripts/gen-universal-table.py
@@ -24,7 +24,8 @@
 
 files = [io.open(x, encoding='utf-8') for x in files]
 
-headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 4]
+headers = [[f.readline() for i in range(2)]
+           for j, f in enumerate(files) if j != 2]
 for j in range(7, 9):
     for line in files[j]:
         line = line.rstrip()
@@ -95,17 +96,18 @@
 data[0][0x111C8] = 'Consonant_Placeholder'
 
 # Merge data into one dict:
-for i,v in enumerate (defaults):
+for i, v in enumerate(defaults):
     values[i][v] = values[i].get (v, 0) + 1
 combined = {}
-for i,d in enumerate (data):
-    for u,v in d.items ():
+for i, d in enumerate(data):
+    for u, v in d.items():
         if not u in combined:
             if i >= 4:
                 continue
             combined[u] = list (defaults)
         combined[u][i] = v
-combined = {k: v for k, v in combined.items() if v[6] not in DISABLED_SCRIPTS}
+combined = {k: v for k, v in combined.items(
+) if v[6] not in DISABLED_SCRIPTS}
 data = combined
 del combined
 
@@ -186,15 +188,20 @@
 class PropertyValue(object):
     def __init__(self, name_):
         self.name = name_
+
     def __str__(self):
         return self.name
+
     def __eq__(self, other):
         return self.name == (other if isinstance(other, str) else other.name)
+
     def __ne__(self, other):
         return not (self == other)
+
     def __hash__(self):
         return hash(str(self))
 
+
 property_values = {}
 
 for name in property_names:
@@ -214,50 +221,87 @@ def is_BASE(U, UISC, UDI, UGC, AJT):
             AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or
             (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
                                     Consonant_Subjoined, Vowel, Vowel_Dependent]))
+
+
 def is_BASE_NUM(U, UISC, UDI, UGC, AJT):
     return UISC == Brahmi_Joining_Number
+
+
 def is_BASE_OTHER(U, UISC, UDI, UGC, AJT):
-    if UISC == Consonant_Placeholder: return True
+    if UISC == Consonant_Placeholder:
+        return True
     return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
+
+
 def is_CGJ(U, UISC, UDI, UGC, AJT):
     # Also includes VARIATION_SELECTOR, WJ, and ZWJ
     return U == 0x200D or UDI and UGC in [Mc, Me, Mn]
+
+
 def is_CONS_FINAL(U, UISC, UDI, UGC, AJT):
     return ((UISC == Consonant_Final and UGC != Lo) or
             UISC == Consonant_Succeeding_Repha)
+
+
 def is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT):
     return UISC == Syllable_Modifier
+
+
 def is_CONS_MED(U, UISC, UDI, UGC, AJT):
     # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec.
     return (UISC == Consonant_Medial and UGC != Lo or
             UISC == Consonant_Initial_Postfixed)
+
+
 def is_CONS_MOD(U, UISC, UDI, UGC, AJT):
     return (UISC in [Nukta, Gemination_Mark, Consonant_Killer] and
             not is_SYM_MOD(U, UISC, UDI, UGC, AJT))
+
+
 def is_CONS_SUB(U, UISC, UDI, UGC, AJT):
     return UISC == Consonant_Subjoined and UGC != Lo
+
+
 def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT):
     return UISC == Consonant_With_Stacker
+
+
 def is_HALANT(U, UISC, UDI, UGC, AJT):
     return (UISC in [Virama, Invisible_Stacker]
             and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT)
             and not is_SAKOT(U, UISC, UDI, UGC, AJT))
+
+
 def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT):
     # Split off of HALANT
     # https://github.com/harfbuzz/harfbuzz/issues/1379
     return U == 0x1134D
+
+
 def is_HALANT_NUM(U, UISC, UDI, UGC, AJT):
     return UISC == Number_Joiner
+
+
 def is_HIEROGLYPH(U, UISC, UDI, UGC, AJT):
     return UISC == Hieroglyph
+
+
 def is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT):
     return UISC == Hieroglyph_Joiner
+
+
 def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT):
     return UISC == Hieroglyph_Segment_Begin
+
+
 def is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT):
     return UISC == Hieroglyph_Segment_End
+
+
 def is_ZWNJ(U, UISC, UDI, UGC, AJT):
     return UISC == Non_Joiner
+
+
 def is_OTHER(U, UISC, UDI, UGC, AJT):
     # Also includes BASE_IND, Rsv, and SYM
     return ((UGC in [Cn, Po] or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other])
@@ -266,93 +310,105 @@ def is_OTHER(U, UISC, UDI, UGC, AJT):
             and not is_CGJ(U, UISC, UDI, UGC, AJT)
             and not is_SYM_MOD(U, UISC, UDI, UGC, AJT)
             )
+
+
 def is_REPHA(U, UISC, UDI, UGC, AJT):
     return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed]
+
+
 def is_SAKOT(U, UISC, UDI, UGC, AJT):
     # Split off of HALANT
     return U == 0x1A60
+
+
 def is_SYM_MOD(U, UISC, UDI, UGC, AJT):
     return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
+
+
 def is_VOWEL(U, UISC, UDI, UGC, AJT):
     # https://github.com/harfbuzz/harfbuzz/issues/376
     return (UISC == Pure_Killer or
             (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
+
+
 def is_VOWEL_MOD(U, UISC, UDI, UGC, AJT):
     # https://github.com/harfbuzz/harfbuzz/issues/376
     return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
             (UGC != Lo and (UISC == Bindu or U in [0xAA29])))
 
+
 use_mapping = {
-    'B':	is_BASE,
-    'N':	is_BASE_NUM,
-    'GB':	is_BASE_OTHER,
-    'CGJ':	is_CGJ,
-    'F':	is_CONS_FINAL,
-    'FM':	is_CONS_FINAL_MOD,
-    'M':	is_CONS_MED,
-    'CM':	is_CONS_MOD,
-    'SUB':	is_CONS_SUB,
-    'CS':	is_CONS_WITH_STACKER,
-    'H':	is_HALANT,
-    'HVM':	is_HALANT_OR_VOWEL_MODIFIER,
-    'HN':	is_HALANT_NUM,
-    'G':	is_HIEROGLYPH,
-    'J':	is_HIEROGLYPH_JOINER,
-    'SB':	is_HIEROGLYPH_SEGMENT_BEGIN,
-    'SE':	is_HIEROGLYPH_SEGMENT_END,
-    'ZWNJ':	is_ZWNJ,
-    'O':	is_OTHER,
-    'R':	is_REPHA,
-    'Sk':	is_SAKOT,
-    'SM':	is_SYM_MOD,
-    'V':	is_VOWEL,
-    'VM':	is_VOWEL_MOD,
+    'B': is_BASE,
+    'N': is_BASE_NUM,
+    'GB': is_BASE_OTHER,
+    'CGJ': is_CGJ,
+    'F': is_CONS_FINAL,
+    'FM': is_CONS_FINAL_MOD,
+    'M': is_CONS_MED,
+    'CM': is_CONS_MOD,
+    'SUB': is_CONS_SUB,
+    'CS': is_CONS_WITH_STACKER,
+    'H': is_HALANT,
+    'HVM': is_HALANT_OR_VOWEL_MODIFIER,
+    'HN': is_HALANT_NUM,
+    'G': is_HIEROGLYPH,
+    'J': is_HIEROGLYPH_JOINER,
+    'SB': is_HIEROGLYPH_SEGMENT_BEGIN,
+    'SE': is_HIEROGLYPH_SEGMENT_END,
+    'ZWNJ': is_ZWNJ,
+    'O': is_OTHER,
+    'R': is_REPHA,
+    'SK': is_SAKOT,
+    'SM': is_SYM_MOD,
+    'V': is_VOWEL,
+    'VM': is_VOWEL_MOD,
 }
 
 use_positions = {
     'F': {
-        'Abv': [Top],
-        'Blw': [Bottom],
-        'Pst': [Right],
+        'ABV': [Top],
+        'BLW': [Bottom],
+        'PST': [Right],
     },
     'M': {
-        'Abv': [Top],
-        'Blw': [Bottom, Bottom_And_Left, Bottom_And_Right],
-        'Pst': [Right],
-        'Pre': [Left, Top_And_Bottom_And_Left],
+        'ABV': [Top],
+        'BLW': [Bottom, Bottom_And_Left, Bottom_And_Right],
+        'PST': [Right],
+        'PRE': [Left, Top_And_Bottom_And_Left],
     },
     'CM': {
-        'Abv': [Top],
-        'Blw': [Bottom, Overstruck],
+        'ABV': [Top],
+        'BLW': [Bottom, Overstruck],
     },
     'V': {
-        'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
-        'Blw': [Bottom, Overstruck, Bottom_And_Right],
-        'Pst': [Right],
-        'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+        'ABV': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
+        'BLW': [Bottom, Overstruck, Bottom_And_Right],
+        'PST': [Right],
+        'PRE': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
     },
     'VM': {
-        'Abv': [Top],
-        'Blw': [Bottom, Overstruck],
-        'Pst': [Right],
-        'Pre': [Left],
+        'ABV': [Top],
+        'BLW': [Bottom, Overstruck],
+        'PST': [Right],
+        'PRE': [Left],
     },
     'SM': {
-        'Abv': [Top],
-        'Blw': [Bottom],
+        'ABV': [Top],
+        'BLW': [Bottom],
     },
     'H': None,
     'HVM': None,
     'B': None,
     'FM': {
-        'Abv': [Top],
-        'Blw': [Bottom],
-        'Pst': [Not_Applicable],
+        'ABV': [Top],
+        'BLW': [Bottom],
+        'PST': [Not_Applicable],
     },
     'R': None,
     'SUB': None,
 }
 
+
 def map_to_use(data):
     out = {}
     items = use_mapping.items()
@@ -361,42 +417,56 @@ def map_to_use(data):
         # Resolve Indic_Syllabic_Category
 
         # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC
-        if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
+        if 0x1CE2 <= U <= 0x1CE8:
+            UISC = Cantillation_Mark
 
         # Tibetan:
         # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC
-        if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
+        if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F:
+            UISC = Vowel_Dependent
 
         # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
-        if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
+        if 0x1BF2 <= U <= 0x1BF3:
+            UISC = Nukta
+            UIPC = Bottom
 
         # TODO: U+1CED should only be allowed after some of
         # the nasalization marks, maybe only for U+1CE9..U+1CF1.
-        if U == 0x1CED: UISC = Tone_Mark
+        if U == 0x1CED:
+            UISC = Tone_Mark
 
         # TODO: https://github.com/microsoft/font-tools/issues/1
-        if U == 0xA982: UISC = Consonant_Succeeding_Repha
+        if U == 0xA982:
+            UISC = Consonant_Succeeding_Repha
 
         values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)]
-        assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UISC, UDI, UGC, AJT, values)
+        assert len(values) == 1, "%s %s %s %s %s %s" % (
+            hex(U), UISC, UDI, UGC, AJT, values)
         USE = values[0]
 
         # Resolve Indic_Positional_Category
 
         # TODO: These should die, but have UIPC in Unicode 13.0.0
-        if U in [0x953, 0x954]: UIPC = Not_Applicable
+        if U in [0x953, 0x954]:
+            UIPC = Not_Applicable
 
         # TODO: These are not in USE's override list that we have, nor are they in Unicode 13.0.0
-        if 0xA926 <= U <= 0xA92A: UIPC = Top
+        if 0xA926 <= U <= 0xA92A:
+            UIPC = Top
+
         # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
         #  and https://github.com/harfbuzz/harfbuzz/issues/1631
-        if U in [0x11302, 0x11303, 0x114C1]: UIPC = Top
-        if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
+        if U in [0x11302, 0x11303, 0x114C1]:
+            UIPC = Top
+        if 0x1CF8 <= U <= 0x1CF9:
+            UIPC = Top
 
         # TODO: https://github.com/harfbuzz/harfbuzz/pull/982
         # also  https://github.com/harfbuzz/harfbuzz/issues/1012
-        if 0x1112A <= U <= 0x1112B: UIPC = Top
-        if 0x11131 <= U <= 0x11132: UIPC = Top
+        if 0x1112A <= U <= 0x1112B:
+            UIPC = Top
+        if 0x11131 <= U <= 0x11132:
+            UIPC = Top
 
         assert (UIPC in [Not_Applicable, Visual_Order_Left] or U == 0x0F7F or
                 USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT)
@@ -404,12 +474,14 @@ def map_to_use(data):
         pos_mapping = use_positions.get(USE, None)
         if pos_mapping:
             values = [k for k,v in pos_mapping.items() if v and UIPC in v]
-            assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values)
+            assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (
+                hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values)
             USE = USE + values[0]
 
         out[U] = (USE, UBlock)
     return out
 
+
 defaults = ('O', 'No_Block')
 data = map_to_use(data)
 
@@ -440,7 +512,7 @@ def print_block(block, start, end, data):
         if u in data:
             num += 1
         d = data.get(u, defaults)
-        print('%6s,' % d[0].upper(), end='')
+        print('%6s,' % d[0], end='')
 
     total += end - start + 1
     used += num