Skip to content

Commit 4f8930b

Browse files
[Py] add missing unicode categories in python library (#3872)
* add missing unicode categories in python library * update changelogs --------- Co-authored-by: Maxime Mangel <[email protected]>
1 parent 79a2610 commit 4f8930b

File tree

4 files changed

+72
-1
lines changed

4 files changed

+72
-1
lines changed

src/Fable.Cli/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
* [All] Updated Fable-FCS to latest F# 9.0 (by @ncave)
1616
* [All] Updated metadata to latest .NET 9.0 (by @ncave)
1717

18+
### Fixed
19+
20+
* [Py] Add missing unicode categories in python library (by @joprice)
21+
1822
## 5.0.0-alpha.5 - 2025-01-09
1923

2024
### Added

src/Fable.Compiler/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515
* [All] Updated Fable-FCS to latest F# 9.0 (by @ncave)
1616
* [All] Updated metadata to latest .NET 9.0 (by @ncave)
1717

18+
### Fixed
19+
20+
* [Py] Add missing unicode categories in python library (by @joprice)
21+
1822
## 5.0.0-alpha.5 - 2025-01-09
1923

2024
### Added

src/fable-library-py/fable_library/char.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,19 @@ class UnicodeCategory(IntEnum):
6060
"Sk": UnicodeCategory.ModifierSymbol,
6161
"Mn": UnicodeCategory.NonSpacingMark,
6262
"Lo": UnicodeCategory.OtherLetter,
63-
"No": UnicodeCategory.OtherLetter,
63+
"No": UnicodeCategory.OtherNumber,
64+
"Lt": UnicodeCategory.TitlecaseLetter,
65+
"Cn": UnicodeCategory.OtherNotAssigned,
66+
"Co": UnicodeCategory.PrivateUse,
67+
"Cs": UnicodeCategory.Surrogate,
68+
"Zp": UnicodeCategory.ParagraphSeparator,
69+
"Lm": UnicodeCategory.ModifierLetter,
70+
"Mc": UnicodeCategory.SpacingCombiningMark,
71+
"Me": UnicodeCategory.EnclosingMark,
72+
"Pe": UnicodeCategory.ClosePunctuation,
73+
"Pf": UnicodeCategory.FinalQuotePunctuation,
74+
"Ps": UnicodeCategory.OpenPunctuation,
75+
"So": UnicodeCategory.OtherSymbol,
6476
}
6577

6678

tests/Python/TestString.fs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,3 +991,54 @@ let ``test calling ToString(CultureInfo.InvariantCulture) works`` () =
991991
(1).ToString(CultureInfo.InvariantCulture) |> equal "1"
992992
(7923209L).ToString(CultureInfo.InvariantCulture) |> equal "7923209"
993993
(7923209UL).ToString(CultureInfo.InvariantCulture) |> equal "7923209"
994+
995+
996+
#if FABLE_COMPILER
997+
open Fable.Core
998+
999+
[<Import("category", "unicodedata")>]
1000+
let unicodeCategory: char -> string = nativeOnly
1001+
1002+
[<Fact>]
1003+
let ``test unicode categories`` () =
1004+
let chars = [
1005+
"\x00", "Cc"
1006+
" ", "Zs"
1007+
"!", "Po"
1008+
"$", "Sc"
1009+
"(", "Ps"
1010+
")", "Pe"
1011+
"+", "Sm"
1012+
"-", "Pd"
1013+
"0", "Nd"
1014+
"A", "Lu"
1015+
"^", "Sk"
1016+
"_", "Pc"
1017+
"a", "Ll"
1018+
"¦", "So"
1019+
"ª", "Lo"
1020+
"«", "Pi"
1021+
"\xad", "Cf"
1022+
"²", "No"
1023+
"»", "Pf"
1024+
"Dž", "Lt"
1025+
"ʰ", "Lm"
1026+
"", "Mn"
1027+
"\u0378", "Cn"
1028+
"\u0488", "Me"
1029+
"\u0903", "Mc"
1030+
"\u16ee", "Nl"
1031+
"\u2028", "Zl"
1032+
"\u2029", "Zp"
1033+
//TODO: this fails with error EXCEPTION: Unable to translate Unicode character \\uD800 at index 116 to specified code page.
1034+
//"\ud800" , "Cs"
1035+
"\ue000", "Co"
1036+
]
1037+
for (s, cat) in chars do
1038+
s
1039+
|> String.iter (fun c ->
1040+
// this ensures that the character is from the expected category
1041+
cat |> equal (unicodeCategory c)
1042+
Char.IsLetterOrDigit c |> ignore
1043+
)
1044+
#endif

0 commit comments

Comments
 (0)