Skip to content

Commit dff73ee

Browse files
committed
case bug fixes
1 parent 1e8a426 commit dff73ee

File tree

7 files changed

+594
-107
lines changed

7 files changed

+594
-107
lines changed

build_src/column.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ pub fn map_char_to_int<'a>(descriptions: &'a Vec<CodePointDescription>, op: impl
182182
if let Some(c) = op(&descriptions[cp]) {
183183
assert!(*c != '\0');
184184
r[cp] = *c as u32;
185+
assert!(r[cp] != 0x1ffff);
185186
} else {
186187
r[cp] = 0;
187188
}

build_src/generators.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ pub fn generate_char_table(
422422
let upper_name = name.to_case(Case::Constant);
423423
let camel_name = name.to_case(Case::Pascal);
424424

425-
let column_bytes = column::compress(column, 1);
425+
let column_bytes = column::compress(column, column_bits);
426426
let index_bytes = column::compress(index, index_bits);
427427

428428
// These are the number of bytes to read to read a value in a single read instruction.
@@ -508,7 +508,7 @@ pub fn generate_char_table(
508508
write!(fd, "/// bool value\n")?;
509509
write!(
510510
fd,
511-
"#[must_use] pub const fn get_{}(code_point: char) -> Optional<char>\n",
511+
"#[must_use] pub const fn get_{}(code_point: char) -> Option<char>\n",
512512
name
513513
)?;
514514
write!(fd, "{{\n")?;

build_src/parsers.rs

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub enum Error {
1818
pub fn parse_single_column<'a>(
1919
url: &str,
2020
path: &std::path::Path,
21-
code_point_descriptions: &mut Vec<CodePointDescription>,
21+
descriptions: &mut Vec<CodePointDescription>,
2222
op : impl Fn(&mut CodePointDescription) -> &mut String
2323
)
2424
-> Result<(), Error>
@@ -38,23 +38,23 @@ pub fn parse_single_column<'a>(
3838
let last_cp = usize::from_str_radix(&cap[2], 16)?;
3939

4040
for cp in first_cp..=last_cp {
41-
if op(&mut code_point_descriptions[cp]).is_empty() {
42-
*op(&mut code_point_descriptions[cp]) = cap[3].to_string();
41+
if op(&mut descriptions[cp]).is_empty() {
42+
*op(&mut descriptions[cp]) = cap[3].to_string();
4343
}
4444
}
4545

4646
} else if let Some(cap) = single_re.captures(&line) {
4747
// Use integers directly, char do not allow surrogates.
4848
let cp = usize::from_str_radix(&cap[1], 16)?;
49-
*op(&mut code_point_descriptions[cp]) = cap[2].to_string();
49+
*op(&mut descriptions[cp]) = cap[2].to_string();
5050

5151
} else if let Some(cap) = range_re.captures(&line) {
5252
// Use integers directly, char do not allow surrogates.
5353
let first_cp = usize::from_str_radix(&cap[1], 16)?;
5454
let last_cp = usize::from_str_radix(&cap[2], 16)?;
5555

5656
for cp in first_cp..=last_cp {
57-
*op(&mut code_point_descriptions[cp]) = cap[3].to_string();
57+
*op(&mut descriptions[cp]) = cap[3].to_string();
5858
}
5959
}
6060
}
@@ -66,7 +66,7 @@ pub fn parse_single_column<'a>(
6666
pub fn parse_existance_column<'a>(
6767
url: &str,
6868
path: &std::path::Path,
69-
code_point_descriptions: &mut Vec<CodePointDescription>,
69+
descriptions: &mut Vec<CodePointDescription>,
7070
op : impl Fn(&mut CodePointDescription) -> &mut bool
7171
)
7272
-> Result<(), Error>
@@ -82,15 +82,15 @@ pub fn parse_existance_column<'a>(
8282
if let Some(cap) = single_re.captures(&line) {
8383
// Use integers directly, char do not allow surrogates.
8484
let cp = usize::from_str_radix(&cap[1], 16)?;
85-
*op(&mut code_point_descriptions[cp]) = true;
85+
*op(&mut descriptions[cp]) = true;
8686

8787
} else if let Some(cap) = range_re.captures(&line) {
8888
// Use integers directly, char do not allow surrogates.
8989
let first_cp = usize::from_str_radix(&cap[1], 16)?;
9090
let last_cp = usize::from_str_radix(&cap[2], 16)?;
9191

9292
for cp in first_cp..=last_cp {
93-
*op(&mut code_point_descriptions[cp]) = true;
93+
*op(&mut descriptions[cp]) = true;
9494
}
9595
}
9696
}
@@ -101,7 +101,7 @@ pub fn parse_existance_column<'a>(
101101
pub fn parse_prop_list_columns<'a>(
102102
url: &str,
103103
path: &std::path::Path,
104-
code_point_descriptions: &mut Vec<CodePointDescription>
104+
descriptions: &mut Vec<CodePointDescription>
105105
)
106106
-> Result<(), Error>
107107
{
@@ -135,44 +135,44 @@ pub fn parse_prop_list_columns<'a>(
135135

136136
for cp in first_cp..=last_cp {
137137
match property_name.as_str() {
138-
"White_Space" => code_point_descriptions[cp].white_space = true,
139-
"Bidi_Control" => code_point_descriptions[cp].bidi_control = true,
140-
"Join_Control" => code_point_descriptions[cp].join_control = true,
141-
"Dash" => code_point_descriptions[cp].dash = true,
142-
"Hyphen" => code_point_descriptions[cp].hyphen = true,
143-
"Quotation_Mark" => code_point_descriptions[cp].quotation_mark = true,
144-
"Terminal_Punctuation" => code_point_descriptions[cp].terminal_punctuation = true,
145-
"Other_Math" => code_point_descriptions[cp].other_math = true,
146-
"Hex_Digit" => code_point_descriptions[cp].hex_digit = true,
147-
"ASCII_Hex_Digit" => code_point_descriptions[cp].ascii_hex_digit = true,
148-
"Other_Alphabetic" => code_point_descriptions[cp].other_alphabetic = true,
149-
"Ideographic" => code_point_descriptions[cp].ideographic = true,
150-
"Diacritic" => code_point_descriptions[cp].diacritic = true,
151-
"Extender" => code_point_descriptions[cp].extender = true,
152-
"Other_Lowercase" => code_point_descriptions[cp].other_lowercase = true,
153-
"Other_Uppercase" => code_point_descriptions[cp].other_uppercase = true,
154-
"Noncharacter_Code_Point" => code_point_descriptions[cp].noncharacter_code_point = true,
155-
"Other_Grapheme_Extend" => code_point_descriptions[cp].other_grapheme_extend = true,
156-
"IDS_Unary_Operator" => code_point_descriptions[cp].ids_unary_operator = true,
157-
"IDS_Binary_Operator" => code_point_descriptions[cp].ids_binary_operator = true,
158-
"IDS_Trinary_Operator" => code_point_descriptions[cp].ids_trinary_operator = true,
159-
"Radical" => code_point_descriptions[cp].radical = true,
160-
"Unified_Ideograph" => code_point_descriptions[cp].unified_ideograph = true,
161-
"Other_Default_Ignorable_Code_Point" => code_point_descriptions[cp].other_default_ignorable_code_point = true,
162-
"Deprecated" => code_point_descriptions[cp].deprecated = true,
163-
"Soft_Dotted" => code_point_descriptions[cp].soft_dotted = true,
164-
"Logical_Order_Exception" => code_point_descriptions[cp].logical_order_exception = true,
165-
"Other_ID_Start" => code_point_descriptions[cp].other_id_start = true,
166-
"Other_ID_Continue" => code_point_descriptions[cp].other_id_continue = true,
167-
"ID_Compat_Math_Continue" => code_point_descriptions[cp].id_compat_math_continue = true,
168-
"ID_Compat_Math_Start" => code_point_descriptions[cp].id_compat_math_start = true,
169-
"Sentence_Terminal" => code_point_descriptions[cp].sentence_terminal = true,
170-
"Variation_Selector" => code_point_descriptions[cp].variation_selector = true,
171-
"Pattern_White_Space" => code_point_descriptions[cp].pattern_white_space = true,
172-
"Pattern_Syntax" => code_point_descriptions[cp].pattern_syntax = true,
173-
"Prepended_Concatenation_Mark" => code_point_descriptions[cp].prepended_concatenation_mark = true,
174-
"Regional_Indicator" => code_point_descriptions[cp].regional_indicator = true,
175-
"Modifier_Combining_Mark" => code_point_descriptions[cp].modifier_combining_mark = true,
138+
"White_Space" => descriptions[cp].white_space = true,
139+
"Bidi_Control" => descriptions[cp].bidi_control = true,
140+
"Join_Control" => descriptions[cp].join_control = true,
141+
"Dash" => descriptions[cp].dash = true,
142+
"Hyphen" => descriptions[cp].hyphen = true,
143+
"Quotation_Mark" => descriptions[cp].quotation_mark = true,
144+
"Terminal_Punctuation" => descriptions[cp].terminal_punctuation = true,
145+
"Other_Math" => descriptions[cp].other_math = true,
146+
"Hex_Digit" => descriptions[cp].hex_digit = true,
147+
"ASCII_Hex_Digit" => descriptions[cp].ascii_hex_digit = true,
148+
"Other_Alphabetic" => descriptions[cp].other_alphabetic = true,
149+
"Ideographic" => descriptions[cp].ideographic = true,
150+
"Diacritic" => descriptions[cp].diacritic = true,
151+
"Extender" => descriptions[cp].extender = true,
152+
"Other_Lowercase" => descriptions[cp].other_lowercase = true,
153+
"Other_Uppercase" => descriptions[cp].other_uppercase = true,
154+
"Noncharacter_Code_Point" => descriptions[cp].noncharacter_code_point = true,
155+
"Other_Grapheme_Extend" => descriptions[cp].other_grapheme_extend = true,
156+
"IDS_Unary_Operator" => descriptions[cp].ids_unary_operator = true,
157+
"IDS_Binary_Operator" => descriptions[cp].ids_binary_operator = true,
158+
"IDS_Trinary_Operator" => descriptions[cp].ids_trinary_operator = true,
159+
"Radical" => descriptions[cp].radical = true,
160+
"Unified_Ideograph" => descriptions[cp].unified_ideograph = true,
161+
"Other_Default_Ignorable_Code_Point" => descriptions[cp].other_default_ignorable_code_point = true,
162+
"Deprecated" => descriptions[cp].deprecated = true,
163+
"Soft_Dotted" => descriptions[cp].soft_dotted = true,
164+
"Logical_Order_Exception" => descriptions[cp].logical_order_exception = true,
165+
"Other_ID_Start" => descriptions[cp].other_id_start = true,
166+
"Other_ID_Continue" => descriptions[cp].other_id_continue = true,
167+
"ID_Compat_Math_Continue" => descriptions[cp].id_compat_math_continue = true,
168+
"ID_Compat_Math_Start" => descriptions[cp].id_compat_math_start = true,
169+
"Sentence_Terminal" => descriptions[cp].sentence_terminal = true,
170+
"Variation_Selector" => descriptions[cp].variation_selector = true,
171+
"Pattern_White_Space" => descriptions[cp].pattern_white_space = true,
172+
"Pattern_Syntax" => descriptions[cp].pattern_syntax = true,
173+
"Prepended_Concatenation_Mark" => descriptions[cp].prepended_concatenation_mark = true,
174+
"Regional_Indicator" => descriptions[cp].regional_indicator = true,
175+
"Modifier_Combining_Mark" => descriptions[cp].modifier_combining_mark = true,
176176
_ => panic!("Unknown property {}", property_name),
177177
}
178178
}
@@ -184,7 +184,7 @@ pub fn parse_prop_list_columns<'a>(
184184
pub fn parse_unicode_data_columns<'a>(
185185
url: &str,
186186
path: &std::path::Path,
187-
code_point_descriptions: &mut Vec<CodePointDescription>
187+
descriptions: &mut Vec<CodePointDescription>
188188
)
189189
-> Result<(), Error>
190190
{
@@ -218,34 +218,34 @@ pub fn parse_unicode_data_columns<'a>(
218218
first_code_value_of_range = code_value;
219219
}
220220

221-
code_point_descriptions[code_value].general_category = cap[2].to_string();
222-
code_point_descriptions[code_value].canonical_combining_class = u8::from_str_radix(&cap[3], 10).unwrap();
223-
code_point_descriptions[code_value].bidi_class = cap[4].to_string();
224-
code_point_descriptions[code_value].decomposition_type = "canonical".to_string();
225-
code_point_descriptions[code_value].decomposition_mapping = String::new();
221+
descriptions[code_value].general_category = cap[2].to_string();
222+
descriptions[code_value].canonical_combining_class = u8::from_str_radix(&cap[3], 10).unwrap();
223+
descriptions[code_value].bidi_class = cap[4].to_string();
224+
descriptions[code_value].decomposition_type = "canonical".to_string();
225+
descriptions[code_value].decomposition_mapping = String::new();
226226

227227
if &cap[6] != "" {
228228
let v = u32::from_str_radix(&cap[6], 16)?;
229229
let c = char::from_u32(v).unwrap();
230-
code_point_descriptions[code_value].upper_case_mapping = Some(c);
230+
descriptions[code_value].upper_case_mapping = Some(c);
231231
}
232232
if &cap[7] != "" {
233233
let v = u32::from_str_radix(&cap[7], 16)?;
234234
let c = char::from_u32(v).unwrap();
235-
code_point_descriptions[code_value].lower_case_mapping = Some(c);
235+
descriptions[code_value].lower_case_mapping = Some(c);
236236
}
237237
if &cap[8] != "" {
238238
let v = u32::from_str_radix(&cap[8], 16)?;
239239
let c = char::from_u32(v).unwrap();
240-
code_point_descriptions[code_value].title_case_mapping = Some(c);
240+
descriptions[code_value].title_case_mapping = Some(c);
241241
}
242242

243243
let mut decomposition = cap[5].to_string();
244244
while !decomposition.is_empty() {
245245
if decomposition.starts_with("<") {
246246
let end = decomposition.find('>').unwrap();
247247
let sub = &decomposition[1..end-1];
248-
code_point_descriptions[code_value].decomposition_type = sub.to_string();
248+
descriptions[code_value].decomposition_type = sub.to_string();
249249

250250
decomposition = String::from(&decomposition[end+1..]);
251251

@@ -257,19 +257,19 @@ pub fn parse_unicode_data_columns<'a>(
257257
let sub = &decomposition[0..end];
258258
let decomposition_code_value = u32::from_str_radix(&sub, 16)?;
259259
let decomposition_cp = char::from_u32(decomposition_code_value).unwrap();
260-
code_point_descriptions[code_value].decomposition_mapping.push(decomposition_cp);
260+
descriptions[code_value].decomposition_mapping.push(decomposition_cp);
261261

262262
decomposition = String::from(&decomposition[end..]);
263263
}
264264
}
265265

266266
if line.contains("Last>:") {
267267
for i in first_code_value_of_range..code_value {
268-
code_point_descriptions[i].general_category = code_point_descriptions[code_value].general_category.clone();
269-
code_point_descriptions[i].canonical_combining_class = code_point_descriptions[code_value].canonical_combining_class;
270-
code_point_descriptions[i].bidi_class = code_point_descriptions[code_value].bidi_class.clone();
271-
code_point_descriptions[i].decomposition_type = code_point_descriptions[code_value].decomposition_type.clone();
272-
code_point_descriptions[i].decomposition_mapping = code_point_descriptions[code_value].decomposition_mapping.clone();
268+
descriptions[i].general_category = descriptions[code_value].general_category.clone();
269+
descriptions[i].canonical_combining_class = descriptions[code_value].canonical_combining_class;
270+
descriptions[i].bidi_class = descriptions[code_value].bidi_class.clone();
271+
descriptions[i].decomposition_type = descriptions[code_value].decomposition_type.clone();
272+
descriptions[i].decomposition_mapping = descriptions[code_value].decomposition_mapping.clone();
273273
}
274274
}
275275

src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ pub mod pattern_syntax;
4444
pub mod prepended_concatenation_mark;
4545
pub mod regional_indicator;
4646
pub mod modifier_combining_mark;
47+
pub mod lower_case_mapping;
48+
pub mod upper_case_mapping;
49+
pub mod title_case_mapping;
4750

4851
pub use east_asian_width::EastAsianWidth;
4952
pub use east_asian_width::get_east_asian_width;
@@ -96,6 +99,9 @@ pub use pattern_syntax::get_pattern_syntax;
9699
pub use prepended_concatenation_mark::get_prepended_concatenation_mark;
97100
pub use regional_indicator::get_regional_indicator;
98101
pub use modifier_combining_mark::get_modifier_combining_mark;
102+
pub use lower_case_mapping::get_lower_case_mapping;
103+
pub use upper_case_mapping::get_upper_case_mapping;
104+
pub use title_case_mapping::get_title_case_mapping;
99105

100106
#[cfg(test)]
101107
mod tests {
@@ -107,4 +113,10 @@ mod tests {
107113
assert_eq!(get_east_asian_width('a'), EastAsianWidth::Na);
108114
assert_eq!(get_east_asian_width('あ'), EastAsianWidth::W);
109115
}
116+
117+
#[test]
118+
fn lower_case_mapping()
119+
{
120+
assert_eq!(get_lower_case_mapping('B'), Some('b'));
121+
}
110122
}

0 commit comments

Comments
 (0)