Skip to content

Commit 8556e9b

Browse files
committed
add Zzzz
1 parent d85295e commit 8556e9b

File tree

4 files changed

+6
-4
lines changed

4 files changed

+6
-4
lines changed

GlotScript/GlotScript.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@
178178
'Nagm': [(124112, 124153)], # Nag_Mundari
179179
'Mend': [(124928, 125124), (125127, 125142)], # Mende_Kikakui
180180
'Adlm': [(125184, 125259), (125264, 125273), (125278, 125279)], # Adlam
181-
'Zyyy': [(0, 64), (91, 96), (123, 169), (171, 185), (187, 191), (215, 215), (247, 247), (697, 735), (741, 745), (748, 767), (884, 884), (894, 894), (901, 901), (903, 903), (1541, 1541), (1548, 1548), (1563, 1563), (1567, 1567), (1600, 1600), (1757, 1757), (2274, 2274), (2404, 2405), (3647, 3647), (4053, 4056), (4347, 4347), (5867, 5869), (5941, 5942), (6146, 6147), (6149, 6149), (7379, 7379), (7393, 7393), (7401, 7404), (7406, 7411), (7413, 7415), (7418, 7418), (8192, 8203), (8206, 8292), (8294, 8304), (8308, 8318), (8320, 8334), (8352, 8384), (8448, 8485), (8487, 8489), (8492, 8497), (8499, 8525), (8527, 8543), (8585, 8587), (8592, 9254), (9280, 9290), (9312, 10239), (10496, 11123), (11126, 11157), (11159, 11263), (11776, 11869), (12272, 12283), (12288, 12292), (12294, 12294), (12296, 12320), (12336, 12343), (12348, 12351), (12443, 12444), (12448, 12448), (12539, 12540), (12688, 12703), (12736, 12771), (12832, 12895), (12927, 13007), (13055, 13055), (13144, 13311), (19904, 19967), (42752, 42785), (42888, 42890), (43056, 43065), (43310, 43310), (43471, 43471), (43867, 43867), (43882, 43883), (64830, 64831), (65040, 65049), (65072, 65106), (65108, 65126), (65128, 65131), (65279, 65279), (65281, 65312), (65339, 65344), (65371, 65381), (65392, 65392), (65438, 65439), (65504, 65510), (65512, 65518), (65529, 65533), (65792, 65794), (65799, 65843), (65847, 65855), (65936, 65948), (66000, 66044), (66273, 66299), (113824, 113827), (118608, 118723), (118784, 119029), (119040, 119078), (119081, 119142), (119146, 119162), (119171, 119172), (119180, 119209), (119214, 119274), (119488, 119507), (119520, 119539), (119552, 119638), (119648, 119672), (119808, 119892), (119894, 119964), (119966, 119967), (119970, 119970), (119973, 119974), (119977, 119980), (119982, 119993), (119995, 119995), (119997, 120003), (120005, 120069), (120071, 120074), (120077, 120084), (120086, 120092), (120094, 120121), (120123, 120126), (120128, 120132), (120134, 120134), (120138, 120144), (120146, 120485), (120488, 120779), (120782, 120831), (126065, 126132), (126209, 126269), (126976, 127019), (127024, 127123), (127136, 127150), (127153, 127167), (127169, 127183), (127185, 127221), (127232, 127405), (127462, 127487), (127489, 127490), (127504, 127547), (127552, 127560), (127568, 127569), (127584, 127589), (127744, 128727), (128732, 128748), (128752, 128764), (128768, 128886), (128891, 128985), (128992, 129003), (129008, 129008), (129024, 129035), (129040, 129095), (129104, 129113), (129120, 129159), (129168, 129197), (129200, 129201), (129280, 129619), (129632, 129645), (129648, 129660), (129664, 129672), (129680, 129725), (129727, 129733), (129742, 129755), (129760, 129768), (129776, 129784), (129792, 129938), (129940, 129994), (130032, 130041), (917505, 917505), (917536, 917631)], # Common
181+
'Zyyy': [(0, 64), (91, 96), (123, 169), (171, 185), (187, 191), (215, 215), (247, 247), (697, 735), (741, 745), (748, 767), (884, 884), (894, 894), (901, 901), (903, 903), (1541, 1541), (1548, 1548), (1563, 1563), (1567, 1567), (1600, 1600), (1757, 1757), (2274, 2274), (2404, 2405), (3647, 3647), (4053, 4056), (4347, 4347), (5867, 5869), (5941, 5942), (6146, 6147), (6149, 6149), (7379, 7379), (7393, 7393), (7401, 7404), (7406, 7411), (7413, 7415), (7418, 7418), (8192, 8203), (8206, 8292), (8294, 8304), (8308, 8318), (8320, 8334), (8352, 8384), (8448, 8485), (8487, 8489), (8492, 8497), (8499, 8525), (8527, 8543), (8585, 8587), (8592, 9254), (9280, 9290), (9312, 10239), (10496, 11123), (11126, 11157), (11159, 11263), (11776, 11869), (12272, 12283), (12288, 12292), (12294, 12294), (12296, 12320), (12336, 12343), (12348, 12351), (12443, 12444), (12448, 12448), (12539, 12540), (12688, 12703), (12736, 12771), (12832, 12895), (12927, 13007), (13055, 13055), (13144, 13311), (19904, 19967), (42752, 42785), (42888, 42890), (43056, 43065), (43310, 43310), (43471, 43471), (43867, 43867), (43882, 43883), (64830, 64831), (65040, 65049), (65072, 65106), (65108, 65126), (65128, 65131), (65279, 65279), (65281, 65312), (65339, 65344), (65371, 65381), (65392, 65392), (65438, 65439), (65504, 65510), (65512, 65518), (65529, 65532), (65792, 65794), (65799, 65843), (65847, 65855), (65936, 65948), (66000, 66044), (66273, 66299), (113824, 113827), (118608, 118723), (118784, 119029), (119040, 119078), (119081, 119142), (119146, 119162), (119171, 119172), (119180, 119209), (119214, 119274), (119488, 119507), (119520, 119539), (119552, 119638), (119648, 119672), (119808, 119892), (119894, 119964), (119966, 119967), (119970, 119970), (119973, 119974), (119977, 119980), (119982, 119993), (119995, 119995), (119997, 120003), (120005, 120069), (120071, 120074), (120077, 120084), (120086, 120092), (120094, 120121), (120123, 120126), (120128, 120132), (120134, 120134), (120138, 120144), (120146, 120485), (120488, 120779), (120782, 120831), (126065, 126132), (126209, 126269), (126976, 127019), (127024, 127123), (127136, 127150), (127153, 127167), (127169, 127183), (127185, 127221), (127232, 127405), (127462, 127487), (127489, 127490), (127504, 127547), (127552, 127560), (127568, 127569), (127584, 127589), (127744, 128727), (128732, 128748), (128752, 128764), (128768, 128886), (128891, 128985), (128992, 129003), (129008, 129008), (129024, 129035), (129040, 129095), (129104, 129113), (129120, 129159), (129168, 129197), (129200, 129201), (129280, 129619), (129632, 129645), (129648, 129660), (129664, 129672), (129680, 129725), (129727, 129733), (129742, 129755), (129760, 129768), (129776, 129784), (129792, 129938), (129940, 129994), (130032, 130041), (917505, 917505), (917536, 917631)], # Common
182+
'Zzzz': [(65533, 65533)]
182183
}
183184

184185

@@ -209,7 +210,7 @@ def predict_script(sent: str) -> ScoredScript:
209210

210211
for char, count in char_counts.items():
211212
ordinal = ord(char)
212-
for script_name in hist_map.get(ordinal, []):
213+
for script_name in hist_map.get(ordinal, {'Zzzz'}):
213214
script_count[script_name] += count
214215

215216

GlotScript/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .GlotScript import get_script_predictor
22

3-
__version__ = '1.0'
3+
__version__ = '1.1'

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Detect the script (writing system) of text based on ISO 15924.
77
## Special codes
88
- `Zinh` code is the Unicode script property value of characters that may be used with multiple scripts, and that inherit their script from a preceding base character. In some cases, we opted to integrate parts of the Zinh code (e.g. ARABIC FATHATAN..ARABIC HAMZA BELOW, ARABIC LETTER SUPERSCRIPT ALEF) into a different block.
99
- `Zyyy` code is the Unicode script for "Common" characters.
10+
- `Zzzz` code is for Unicode script for "uncoded" script.
1011

1112
## Install from pip
1213
```bash

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="GlotScript",
8-
version="1.0",
8+
version="1.1",
99
author="Amir Hossein Kargaran",
1010
author_email="[email protected]",
1111
description="A package for detecting the script (writing system) of given text.",

0 commit comments

Comments
 (0)