Skip to content

Commit 0621e70

Browse files
authored
Merge pull request #93 from LaurenzV/3.1.1
Sync with 3.1.1
2 parents 0f0b732 + 10f9230 commit 0621e70

18 files changed

+664
-590
lines changed

scripts/gen-universal-table.py

+83-115
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# Override values For Indic_Positional_Category
22
# Not derivable
33
# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
4-
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
4+
# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
55
# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
6-
# Updated for L2/19-083 by Andrew Glass 2019-05-06
7-
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
8-
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
6+
# Updated for L2/19-083 by Andrew Glass 2019-05-06
7+
# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
8+
# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
9+
# Updated for Unicode 14.0 by Andrew Glass 2021-09-28
910

1011
# ================================================
1112
# ================================================
@@ -14,39 +15,39 @@
1415
# ================================================
1516

1617
# Indic_Positional_Category=Bottom
17-
0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model
18-
0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
19-
0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
20-
A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
21-
11127..11129; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
22-
1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI
23-
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
18+
0F72 ; Bottom # Mn TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model
19+
0F7A..0F7D ; Bottom # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
20+
0F80 ; Bottom # Mn TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
21+
A9BF ; Bottom # Mc JAVANESE CONSONANT SIGN CAKRA
22+
11127..11129 ; Bottom # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
23+
1112D ; Bottom # Mn CHAKMA VOWEL SIGN AI
24+
11130 ; Bottom # Mn CHAKMA VOWEL SIGN OI
2425

2526
# ================================================
2627

2728
# Indic_Positional_Category=Left
28-
1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left
29+
1C29 ; Left # Mc LEPCHA VOWEL SIGN OO # Reduced from Top_And_Left
2930

3031
# ================================================
3132

3233

3334
# Indic_Positional_Category=Right
34-
A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right
35-
10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers
36-
11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right
35+
A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right
36+
10A0C ; Right # Mn KHAROSHTHI VOWEL LENGTH MARK # Follows vowels and precedes vowel modifiers
37+
11942 ; Right # Mc DIVES AKURU MEDIAL RA # Reduced from Bottom_And_Right
3738

3839
# ================================================
3940

4041
# Indic_Positional_Category=Top
41-
0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
42-
1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
43-
AA35   ; Top # Mn       CHAM CONSONANT SIGN
42+
0F74 ; Top # Mn TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
43+
1A18 ; Top # Mn BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
44+
AA35   ; Top # Mn       CHAM CONSONANT SIGN
4445

4546
# ================================================
4647

4748
# Indic_Positional_Category=Top_And_Right
48-
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
49-
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
49+
0E33 ; Top_And_Right # Lo THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
50+
0EB3 ; Top_And_Right # Lo LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
5051

5152
# ================================================
5253
# ================================================
@@ -55,41 +56,46 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
5556
# ================================================
5657

5758
# Indic_Positional_Category=Bottom
58-
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
59-
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
60-
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Not really bottom, but here for ccc to control
61-
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
62-
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
63-
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden to below because ccc-based Normalization controls order
64-
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
65-
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden to below because ccc-based Normalization controls order
66-
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
67-
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
68-
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
69-
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
59+
0859..085B ; Bottom # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
60+
18A9 ; Bottom # Mn MONGOLIAN LETTER ALI GALI DAGALGA
61+
10AE5 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK ABOVE # Overriden, ccc controls order
62+
10AE6 ; Bottom # Mn MANICHAEAN ABBREVIATION MARK BELOW
63+
10F46..10F47 ; Bottom # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
64+
10F48..10F4A ; Bottom # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE # Overriden, ccc controls order
65+
10F4B ; Bottom # Mn SOGDIAN COMBINING CURVE BELOW
66+
10F4C ; Bottom # Mn SOGDIAN COMBINING HOOK ABOVE # Overriden, ccc controls order
67+
10F4D..10F50 ; Bottom # Mn [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
68+
10F82 ; Bottom # Mn OLD UYGHUR COMBINING DOT ABOVE # Overriden, ccc controls order
69+
10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW
70+
10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
71+
10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW
72+
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
73+
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
74+
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
7075

7176
# ================================================
7277

7378
# Indic_Positional_Category=Left
74-
103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA
79+
103C ; Left # Mc MYANMAR CONSONANT SIGN MEDIAL RA
7580

7681
# ================================================
7782

7883
# Indic_Positional_Category=Top
79-
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
80-
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
81-
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
82-
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
83-
1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
84-
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
85-
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
86-
1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
87-
1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
84+
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
85+
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
86+
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
87+
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
88+
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
89+
16B30..16B36 ; Top # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
90+
1E130..1E136 ; Top # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
91+
1E2AE ; Top # Mn TOTO SIGN RISING TONE
92+
1E2EC..1E2EF ; Top # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
93+
1E944..1E94A ; Top # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
8894

8995
# ================================================
9096

9197
# Indic_Positional_Category=Overstruck
92-
1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
98+
1BC9D..1BC9E ; Overstruck # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
9399

94100
# ================================================
95101
# ================================================
@@ -98,5 +104,6 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
98104
# ================================================
99105

100106
# Indic_Positional_Category=NA
101-
180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
102-
2D7F ; NA # Mn TIFINAGH CONSONANT JOINER
107+
180B..180D ; NA # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
108+
180F ; NA # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
109+
2D7F ; NA # Mn TIFINAGH CONSONANT JOINER

0 commit comments

Comments
 (0)