diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 18d53a3f0..07688f424 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2025-01-27, 18:09:08 GMT +# Date: 2025-04-15, 14:53:10 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2093,7 +2093,7 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY 16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS 187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF -18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E +18D09..18D1F ; 17.0 # [23] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL 1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA @@ -2116,6 +2116,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 2B73A..2B73E ; 17.0 # [5] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73E 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 4836 +# Total code points: 4837 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c5df66f61..eb15863db 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-02-14, 00:13:14 GMT +# Date: 2025-04-15, 14:53:27 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1340,7 +1340,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; Alphabetic # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; Alphabetic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1471,7 +1471,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 147441 +# Total code points: 147442 # ================================================ @@ -6928,7 +6928,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; ID_Start # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; ID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -7044,7 +7044,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145935 +# Total code points: 145936 # ================================================ @@ -8326,7 +8326,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; ID_Continue # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; ID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -8484,7 +8484,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149273 +# Total code points: 149274 # ================================================ @@ -9163,7 +9163,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; XID_Start # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; XID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -9279,7 +9279,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145912 +# Total code points: 145913 # ================================================ @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; XID_Continue # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; XID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -10720,7 +10720,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149254 +# Total code points: 149255 # ================================================ @@ -12799,7 +12799,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; Grapheme_Base # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; Grapheme_Base # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; Grapheme_Base # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -13016,7 +13016,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 157523 +# Total code points: 157524 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a5c270b19..a447a1cc6 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:15 GMT +# Date: 2025-04-15, 14:53:31 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2388,7 +2388,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1E ; W # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D00..18D1F ; W # Lo [32] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index b7e7fae28..4332df081 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2025-02-14, 15:13:07 GMT +# Date: 2025-04-15, 14:53:32 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3297,7 +3297,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D00..18D1F ; ID # Lo [32] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; ID # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 878a4b104..a251a704f 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2025-02-18, 12:46:41 GMT +# Date: 2025-04-15, 14:53:39 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -885,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; Ideographic # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; Ideographic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF @@ -898,7 +898,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 110930 +# Total code points: 110931 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 139c00537..c271dc94f 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-04-15, 14:53:51 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2775,10 +2775,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI 16FE0 ; Tangut # Lm TANGUT ITERATION MARK 17000..18AFF ; Tangut # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1E ; Tangut # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D00..18D1F ; Tangut # Lo [32] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; Tangut # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 -# Total code points: 7059 +# Total code points: 7060 # ================================================ diff --git a/unicodetools/data/ucd/dev/TangutSources.txt b/unicodetools/data/ucd/dev/TangutSources.txt index 48b054501..d3bb5b0ec 100644 --- a/unicodetools/data/ucd/dev/TangutSources.txt +++ b/unicodetools/data/ucd/dev/TangutSources.txt @@ -12406,5 +12406,7 @@ U+18D1D kTGT_MergedSrc H2021-309801 U+18D1D kRSTUnicode 106.13 U+18D1E kTGT_MergedSrc H2021-834001 U+18D1E kRSTUnicode 579.15 +U+18D1F kTGT_MergedSrc H2021-509604 +U+18D1F kRSTUnicode 210.14 # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 26b78592a..3e689f5cb 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -31786,7 +31786,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; -18D1E;;Lo;0;L;;;;;N;;;;; +18D1F;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; 18D81;TANGUT COMPONENT-770;Lo;0;L;;;;;N;;;;; 18D82;TANGUT COMPONENT-771;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 71770a13d..83d17e240 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-01-29 +# Date: 2025-04-15, 14:53:53 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2226,8 +2226,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 18CD6..18CFE ; U # Cn [41] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -18D00..18D1E ; U # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E -18D1F..18D7F ; U # Cn [97] .. +18D00..18D1F ; U # Lo [32] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1F +18D20..18D7F ; U # Cn [96] .. 18D80..18DF2 ; U # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 18DF3..18DFF ; U # Cn [13] .. 1AFF0..1AFF3 ; U # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 0e5dc2e11..510fefce2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-04-15, 14:53:51 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2541,7 +2541,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; OLetter # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; OLetter # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; OLetter # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2622,7 +2622,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141520 +# Total code points: 141521 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a4dc4250a..c43c60d02 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-04-15, 14:53:25 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1117,7 +1117,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; L # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; L # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1234,7 +1234,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 810584 code points not listed here. +# The above property value applies to 810583 code points not listed here. # Total code points: 1095402 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3a10fc1e4..fb710342f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-04-15, 14:53:26 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1862,7 +1862,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; 0 # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; 0 # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; 0 # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2095,7 +2095,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816745 code points not listed here. +# The above property value applies to 816744 code points not listed here. # Total code points: 1113143 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c3d0bb02a..2f019d70b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:12 GMT +# Date: 2025-04-15, 14:53:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2144,8 +2144,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760566 code points not listed here. -# Total code points: 792267 +# The above property value applies to 760565 code points not listed here. +# Total code points: 792266 # ================================================ @@ -2550,7 +2550,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; W # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; W # Lo [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -2622,7 +2622,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 # The above property value applies to 56179 code points not listed here. -# Total code points: 182768 +# Total code points: 182769 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 22b9a85f6..52ec8ac4d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-04-15, 14:53:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -581,7 +581,7 @@ FFFE..FFFF ; Cn # [2] .. 16FE5..16FEF ; Cn # [11] .. 16FF7..16FFF ; Cn # [9] .. 18CD6..18CFE ; Cn # [41] .. -18D1F..18D7F ; Cn # [97] .. +18D20..18D7F ; Cn # [96] .. 18DF3..1AFEF ; Cn # [8701] .. 1AFF4 ; Cn # 1AFFC ; Cn # @@ -754,7 +754,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814697 +# Total code points: 814696 # ================================================ @@ -2664,7 +2664,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION 17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CFF..18D1E ; Lo # [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E +18CFF..18D1F ; Lo # [33] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; Lo # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B132 ; Lo # HIRAGANA LETTER SMALL KO @@ -2738,7 +2738,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Lo # [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141081 +# Total code points: 141082 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 010826221..d2261ea9a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-02-14, 17:30:22 GMT +# Date: 2025-04-15, 14:53:29 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,8 +70,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757136 code points not listed here. -# Total code points: 894604 +# The above property value applies to 757135 code points not listed here. +# Total code points: 894603 # ================================================ @@ -1792,7 +1792,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 11F45..11F4F ; ID # Po [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..18AFF ; ID # Lo [6912] TANGUT IDEOGRAPH-17000..TANGUT COMPONENT-768 -18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E +18D00..18D1F ; ID # Lo [32] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1F 18D80..18DF2 ; ID # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B000..1B122 ; ID # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB @@ -1884,7 +1884,7 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 31350..33479 ; ID # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 # The above property value applies to 57546 code points not listed here. -# Total code points: 172561 +# Total code points: 172562 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 5876213a1..6064bf121 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2025-01-27, 18:09:14 GMT +# Date: 2025-04-15, 14:53:29 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37986,7 +37986,7 @@ FFFD ; REPLACEMENT CHARACTER 18AFF ; TANGUT COMPONENT-768 18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* -18D00..18D1E ; TANGUT IDEOGRAPH-* +18D00..18D1F ; TANGUT IDEOGRAPH-* 18D80 ; TANGUT COMPONENT-769 18D81 ; TANGUT COMPONENT-770 18D82 ; TANGUT COMPONENT-771 @@ -45870,6 +45870,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159834 +# Total code points: 159835 # EOF diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java index 7470ab0f2..6ef4333ea 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD.java @@ -1320,6 +1320,11 @@ public static int mapToRepresentative(int ch, int rCompositeVersion) { // Unicode 17 added TANGUT IDEOGRAPH-18D1D..TANGUT IDEOGRAPH-18D1E. return TANGUT_SUP_BASE; } + // REMOVE BEFORE FLIGHT: Fix version. + if (ch <= 0x18D1F && rCompositeVersion >= 0x110000) { + // Unicode ? added TANGUT IDEOGRAPH-18D1F. + return TANGUT_SUP_BASE; + } } // 20000..2A6DF; CJK Unified Ideographs Extension B diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/205.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/205.txt new file mode 100644 index 000000000..203282033 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/205.txt @@ -0,0 +1,17 @@ +# Tangut: 18D1F TANGUT IDEOGRAPH-18D1F +# https://github.com/unicode-org/utc-release-management/issues/205 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring kRSTUnicode kTGT_MergedSrc: +Propertywise [\x{18D00}\x{18D1F}] AreAlike +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file