Skip to content

Commit 6f9caa3

Browse files
committed
faster
1 parent 939b145 commit 6f9caa3

File tree

3 files changed

+273
-267
lines changed

3 files changed

+273
-267
lines changed

build/ucd_generator/column.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,13 @@ fn compress_insert_value(bytes: &mut Vec<u8>, offset : usize, mut value : usize)
149149

150150
/// Compress and integer tables into tightly packed bytes.
151151
///
152-
/// The byte array will end in 7 zero bytes, to be able to
153-
/// do unaligned reads without running beyond the bounds.
154152
pub fn compress(input: &Vec<usize>, num_bits: usize) -> Vec<u8>
155153
{
156154
let total_num_bits = num_bits * input.len();
157155
let total_num_bytes = (total_num_bits + 7) / 8;
158156

159-
let mut r = Vec::<u8>::with_capacity(total_num_bytes + 7);
160-
r.resize(total_num_bytes + 7, 0);
157+
let mut r = Vec::<u8>::with_capacity(total_num_bytes);
158+
r.resize(total_num_bytes, 0);
161159

162160
let mut offset : usize = 0;
163161
for x in input {
@@ -167,3 +165,4 @@ pub fn compress(input: &Vec<usize>, num_bits: usize) -> Vec<u8>
167165

168166
return r;
169167
}
168+

build/ucd_generator/generators.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values
2525
let column_bytes = column::compress(column, column_bits);
2626
let index_bytes = column::compress(index, index_bits);
2727

28+
// These are the number of bytes to read to read a value in a single read instruction.
29+
let index_bytes_to_read = ((index_bits + 7) / 8 + 1).next_power_of_two();
30+
let column_bytes_to_read = ((column_bits + 7) / 8 + 1).next_power_of_two();
31+
2832
let code_path = code_dir.join(format!("{}.rs", &name));
2933
let mut fd = std::fs::File::create(&code_path)?;
3034

@@ -36,21 +40,26 @@ pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values
3640
write!(fd, "const {}_INDEX_BITS : usize = {};\n\n", upper_name, index_bits)?;
3741
write!(fd, "const {}_INDEX_BYTE_OFFSET : usize = {};\n\n", upper_name, column_bytes.len())?;
3842

39-
write!(fd, "const {}_DATA: [u8; {}] = [\n", upper_name, column_bytes.len() + index_bytes.len())?;
43+
let data_bytes_len = column_bytes.len() + index_bytes.len() + index_bytes_to_read - 1;
44+
write!(fd, "const {}_DATA: [u8; {}] = [\n", upper_name, data_bytes_len)?;
4045
write!(fd, " // Column table")?;
4146
for (i, v) in column_bytes.iter().enumerate() {
4247
if i % 32 == 0 {
43-
write!(fd, "\n ")?;
48+
write!(fd, "\n ")?;
4449
}
4550
write!(fd, "{:3},", v)?;
4651
}
4752
write!(fd, "\n // Index table")?;
4853
for (i, v) in index_bytes.iter().enumerate() {
4954
if i % 32 == 0 {
50-
write!(fd, "\n ")?;
55+
write!(fd, "\n ")?;
5156
}
5257
write!(fd, "{:3},", v)?;
5358
}
59+
write!(fd, "\n // Padding to handle unaligned word reads.\n ")?;
60+
for _ in 1..index_bytes_to_read {
61+
write!(fd, "{:3},", 0)?;
62+
}
5463
write!(fd, "\n];\n\n")?;
5564

5665
write!(fd, "/// The {} attribute for Unicode code-points.\n", camel_name)?;
@@ -84,7 +93,6 @@ pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values
8493
write!(fd, " let index_byte_offset = index_offset / 8;\n")?;
8594
write!(fd, " let index_bit_offset = index_offset % 8;\n")?;
8695
write!(fd, " let mut index: usize = 0;\n")?;
87-
let index_bytes_to_read = (index_bits + 7) / 8 + 1;
8896
for i in (0..index_bytes_to_read).rev() {
8997
write!(fd, " index |= ({}_DATA[{}_INDEX_BYTE_OFFSET + index_byte_offset + {}] as usize) << {};\n", upper_name, upper_name, i, i * 8)?;
9098
}
@@ -96,7 +104,6 @@ pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values
96104
write!(fd, " let column_bit_offset = column_offset % 8;\n\n")?;
97105

98106
write!(fd, " let mut value: usize = 0;\n")?;
99-
let column_bytes_to_read = (column_bits + 7) / 8 + 1;
100107
for i in (0..column_bytes_to_read).rev() {
101108
write!(fd, " value |= ({}_DATA[column_byte_offset + {}] as usize) << {};\n", upper_name, i, i * 8)?;
102109
}
@@ -107,7 +114,7 @@ pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values
107114
for (i, v) in enum_values.iter().enumerate() {
108115
write!(fd, " {} => {}::{},\n", i, camel_name, v)?;
109116
}
110-
write!(fd, " _ => panic!(\"Invalid value.\")\n")?;
117+
write!(fd, " _ => {}::{},\n", camel_name, enum_values[0])?;
111118
write!(fd, " }};\n")?;
112119
write!(fd, "}}\n\n")?;
113120

0 commit comments

Comments
 (0)