|
| 1 | +import re |
| 2 | +from unicodedata import name |
| 3 | + |
| 4 | +APOSTROPHE = [] |
| 5 | +BRACKET = [] |
| 6 | +COLON = [] |
| 7 | +COMMA = [] |
| 8 | +CURRENCY = [] |
| 9 | +CURRENCY_RAW = [] |
| 10 | +EXCLAMATION_MARK_RAW = [] |
| 11 | +FULL_STOP = [] |
| 12 | +HASHTAG = [] |
| 13 | +HASHTAG_RAW = [] |
| 14 | +MENTION = [] |
| 15 | +MENTION_RAW = [] |
| 16 | +PAREN = [] |
| 17 | +QUESTION_MARK_RAW = [] |
| 18 | +QUOTE = [] |
| 19 | +SENTENCE_END = [] |
| 20 | +URL = [] |
| 21 | +URL_RAW = [] |
| 22 | +WORD_DELIM = [] |
| 23 | + |
| 24 | +for i in range(2_000_000): |
| 25 | + try: |
| 26 | + if "APOSTROPHE" in name(chr(i)) and (chr(i) not in ["ʼn", "\U000e0027"]): |
| 27 | + APOSTROPHE.append(chr(i)) |
| 28 | + if ( |
| 29 | + "BRACKET" in name(chr(i)) |
| 30 | + and "IDEOGRAPH" not in name(chr(i)) |
| 31 | + and "TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S" not in name(chr(i)) |
| 32 | + ): |
| 33 | + BRACKET.append(chr(i)) |
| 34 | + if "COLON" in name(chr(i)) and i != 8353: # remove the colon currency sign (₡) |
| 35 | + COLON.append(chr(i)) |
| 36 | + if ( |
| 37 | + ("COMMA" in name(chr(i))) |
| 38 | + and not re.match("LATIN (SMALL|CAPITAL) LETTER", name(chr(i))) |
| 39 | + and not re.match("DIGIT", name(chr(i))) |
| 40 | + ): |
| 41 | + COMMA.append(chr(i)) |
| 42 | + if "EXCLAMATION" in name(chr(i)): |
| 43 | + EXCLAMATION_MARK_RAW.append(chr(i)) |
| 44 | + if ( |
| 45 | + "FULL STOP" in name(chr(i)) |
| 46 | + and (not name(chr(i)).startswith("DIGIT")) |
| 47 | + and (not name(chr(i)).startswith("NUMBER")) |
| 48 | + ): |
| 49 | + FULL_STOP.append(chr(i)) |
| 50 | + if "QUOT" in name(chr(i)) and name(chr(i)) != "YI SYLLABLE QUOT": |
| 51 | + QUOTE.append(chr(i)) |
| 52 | + if "CURRENC" in name(chr(i)): |
| 53 | + CURRENCY.append(chr(i)) |
| 54 | + if ("PAREN" in name(chr(i))) and not re.match("PARENTHESIZED", name(chr(i))): |
| 55 | + PAREN.append(chr(i)) |
| 56 | + if "QUESTION" in name(chr(i)) and "IDEOGRAPH" not in name(chr(i)): |
| 57 | + QUESTION_MARK_RAW.append(chr(i)) |
| 58 | + except Exception: |
| 59 | + continue |
0 commit comments