|
| 1 | + |
| 2 | +use crate::ucd_generator::column; |
| 3 | +use std::io::Write; |
| 4 | +use thiserror::Error; |
| 5 | +use convert_case::{Case, Casing}; |
| 6 | + |
| 7 | +#[derive(Error, Debug)] |
| 8 | +pub enum Error { |
| 9 | + #[error("Failed to write")] |
| 10 | + IO(#[from] std::io::Error), |
| 11 | + #[error("Failed to format")] |
| 12 | + Formatting(#[from] std::fmt::Error), |
| 13 | +} |
| 14 | + |
| 15 | +pub fn generate_enum_table(code_dir : &std::path::Path, name : &str, enum_values: &Vec<String>, column: &Vec<usize>, index: &Vec<usize>, chunk_size: usize) -> Result<(), Error> |
| 16 | +{ |
| 17 | + let upper_name = name.to_case(Case::Constant); |
| 18 | + let camel_name = name.to_case(Case::Pascal); |
| 19 | + |
| 20 | + let index_max_value = *index.iter().max().unwrap_or(&0); |
| 21 | + |
| 22 | + let index_bits = (index_max_value + 1).next_power_of_two().trailing_zeros() as usize; |
| 23 | + let column_bits = enum_values.len().next_power_of_two().trailing_zeros() as usize; |
| 24 | + |
| 25 | + let column_bytes = column::compress(column, column_bits); |
| 26 | + let index_bytes = column::compress(index, index_bits); |
| 27 | + |
| 28 | + let code_path = code_dir.join(format!("{}.rs", &name)); |
| 29 | + let mut fd = std::fs::File::create(&code_path)?; |
| 30 | + |
| 31 | + write!(fd, "const {}_CHUNK_SIZE : usize = {};\n", upper_name, chunk_size)?; |
| 32 | + write!(fd, "const {}_COLUMN_BITS : usize = {};\n", upper_name, column_bits)?; |
| 33 | + write!(fd, "const {}_INDEX_LEN : usize = {};\n\n", upper_name, index.len())?; |
| 34 | + write!(fd, "const {}_INDEX_BITS : usize = {};\n\n", upper_name, index_bits)?; |
| 35 | + |
| 36 | + write!(fd, "const {}_COLUMN: [u8; {}] = [", upper_name, column_bytes.len())?; |
| 37 | + for (i, v) in column_bytes.iter().enumerate() { |
| 38 | + if i % 32 == 0 { |
| 39 | + write!(fd, "\n ")?; |
| 40 | + } |
| 41 | + write!(fd, "{:3},", v)?; |
| 42 | + } |
| 43 | + write!(fd, "\n];\n\n")?; |
| 44 | + |
| 45 | + write!(fd, "const {}_INDEX: [u8; {}] = [", upper_name, index_bytes.len())?; |
| 46 | + for (i, v) in index_bytes.iter().enumerate() { |
| 47 | + if i % 32 == 0 { |
| 48 | + write!(fd, "\n ")?; |
| 49 | + } |
| 50 | + write!(fd, "{:3},", v)?; |
| 51 | + } |
| 52 | + write!(fd, "\n];\n\n")?; |
| 53 | + |
| 54 | + write!(fd, "#[derive(Debug,Clone,Copy,PartialEq)]\n")?; |
| 55 | + |
| 56 | + write!(fd, "pub enum {} {{\n", camel_name)?; |
| 57 | + for (i, v) in enum_values.iter().enumerate() { |
| 58 | + write!(fd, " {} = {},\n", v, i)?; |
| 59 | + } |
| 60 | + write!(fd, "}}\n\n")?; |
| 61 | + |
| 62 | + write!(fd, "pub const fn get_{}(code_point: char) -> {}\n", name, camel_name)?; |
| 63 | + write!(fd, "{{\n")?; |
| 64 | + write!(fd, " const INDEX_MASK : usize = (1 << {}_INDEX_BITS) - 1;\n", upper_name)?; |
| 65 | + write!(fd, " const COLUMN_MASK : usize = (1 << {}_COLUMN_BITS) - 1;\n", upper_name)?; |
| 66 | + |
| 67 | + write!(fd, " let code_point_value = code_point as usize;\n")?; |
| 68 | + write!(fd, " let code_point_lo = code_point_value & {}_CHUNK_SIZE;\n", upper_name)?; |
| 69 | + write!(fd, " let mut code_point_hi = code_point_value / {}_CHUNK_SIZE;\n", upper_name)?; |
| 70 | + write!(fd, " if code_point_hi > {}_INDEX_LEN - 1 {{\n", upper_name)?; |
| 71 | + write!(fd, " code_point_hi = {}_INDEX_LEN - 1;\n", upper_name)?; |
| 72 | + write!(fd, " }}\n\n")?; |
| 73 | + |
| 74 | + write!(fd, " let index_offset = code_point_hi * {}_INDEX_BITS;\n", upper_name)?; |
| 75 | + write!(fd, " let index_byte_offset = index_offset / 8;\n")?; |
| 76 | + write!(fd, " let index_bit_offset = index_offset % 8;\n")?; |
| 77 | + write!(fd, " let mut index: usize = 0;\n")?; |
| 78 | + let index_bytes_to_read = (index_bits + 7) / 8 + 1; |
| 79 | + for i in 0..index_bytes_to_read { |
| 80 | + write!(fd, " index |= ({}_INDEX[index_byte_offset + {}] as usize) << {};\n", upper_name, i, i * 8)?; |
| 81 | + } |
| 82 | + write!(fd, " index >>= index_bit_offset;\n")?; |
| 83 | + write!(fd, " index &= INDEX_MASK;\n\n")?; |
| 84 | + |
| 85 | + write!(fd, " let column_offset = (index * {}_CHUNK_SIZE + code_point_lo) * {}_COLUMN_BITS;\n", upper_name, upper_name)?; |
| 86 | + write!(fd, " let column_byte_offset = column_offset / 8;\n")?; |
| 87 | + write!(fd, " let column_bit_offset = column_offset % 8;\n")?; |
| 88 | + write!(fd, " let mut value: usize = 0;\n")?; |
| 89 | + let column_bytes_to_read = (column_bits + 7) / 8 + 1; |
| 90 | + for i in 0..column_bytes_to_read { |
| 91 | + write!(fd, " value |= ({}_COLUMN[column_byte_offset + {}] as usize) << {};\n", upper_name, i, i * 8)?; |
| 92 | + } |
| 93 | + write!(fd, " value >>= column_bit_offset;\n")?; |
| 94 | + write!(fd, " value &= COLUMN_MASK;\n\n")?; |
| 95 | + |
| 96 | + write!(fd, " return match value {{\n")?; |
| 97 | + for (i, v) in enum_values.iter().enumerate() { |
| 98 | + write!(fd, " {} => {}::{},\n", i, camel_name, v)?; |
| 99 | + } |
| 100 | + write!(fd, " _ => panic!(\"Invalid value.\")\n")?; |
| 101 | + write!(fd, " }};\n")?; |
| 102 | + write!(fd, "}}\n\n")?; |
| 103 | + |
| 104 | + return Ok(()); |
| 105 | +} |
0 commit comments