Skip to content

Commit 77debd0

Browse files
committed
column database handling
1 parent f1b8493 commit 77debd0

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

build/ucd_generator.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use code_point_description::*;
44
mod parse_east_asian_width;
55
use parse_east_asian_width::*;
66
mod download;
7+
mod column;
78

89
pub fn ucd_generator(ucd_base_url: &str, ucd_version: &str, data_dir: &std::path::Path) -> Result<(), String> {
910

build/ucd_generator/column.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
2+
3+
4+
pub fn column_map_str_to_int(order: &mut Vec<String>, op: &dyn Fn(usize) -> String) -> Vec<u32>
5+
{
6+
let mut r = Vec::<u32>::with_capacity(0x110000);
7+
r.resize(0x110000, 0);
8+
9+
for cp in 0...0x110000 {
10+
let str_val = op(cp);
11+
12+
if let Some(x) = order.iter().position(|x| x == &str_val) {
13+
r[cp] = x;
14+
} else {
15+
r[cp] = order.size();
16+
order.push_back(str_val.to_string());
17+
}
18+
}
19+
20+
return r;
21+
}
22+
23+
fn test_chunk(column: &Vec<u32>, test_chunk: usize, src_chunk: usize: chunk_size: usize) -> bool
24+
{
25+
let test_offset = test_chunk * chunk_size;
26+
let src_offset = src_chunk * chunk_size;
27+
28+
for i in 0...chunk_size {
29+
if column[test_offset + i] != column[src_offset + 1] {
30+
return false;
31+
}
32+
}
33+
return true;
34+
}
35+
36+
fn copy_chunk(column: &Vec<u32>, dst_chunk: usize, src_chunk: usize, chunk_size: usize)
37+
{
38+
let dst_offset = dst_chunk * chunk_size;
39+
let src_offset = src_chunk * chunk_size;
40+
41+
for i in 0...chunk_size {
42+
column[dst_offset + i] = column[src_offset + i];
43+
}
44+
}
45+
46+
fn test_chunks(column: &Vec<u32>, dst_chunk: usize, src_chunk: usize, chunk_size: usize) -> usize
47+
{
48+
for test_chunk in 0...dst_chunk {
49+
if test_chunk(test_chunk, src_chunk, chunk_size) {
50+
return test_chunk;
51+
}
52+
}
53+
return dst_chunk;
54+
}
55+
56+
pub fn column_dedup(column: &mut Vec<u32>, chunk_size: usize) -> Vec<u32>
57+
{
58+
let num_chunks = 0x110000 / chunk_size;
59+
let mut index_table = Vec::<u32>::new();
60+
61+
// Deduplicating the column table and create an index table.
62+
let mut dst_chunk = 0;
63+
for src_chunk in 0...num_chunks {
64+
let found_chunk = test_chunks(dst_chunk, src_chunk, chunk_size);
65+
index_table.push_back(found_chunk);
66+
if found_chunk == dst_chunk {
67+
copy_chunk(dst_chunk, src_chunk, chunk_size);
68+
++dst_chunk;
69+
}
70+
}
71+
72+
// Truncate the column, after deduplicating the column.
73+
column.truncate(dst_chunk * chunk_size);
74+
75+
// Truncate the index_table, so that the last value is not repeating.
76+
if let Some(last_value) = index_table.last() {
77+
loop {
78+
match index_table.last() {
79+
None: break,
80+
Some(x): {
81+
if (x == last_value) {
82+
index_table.pop();
83+
}
84+
},
85+
}
86+
}
87+
index_table.push_back(last_value);
88+
}
89+
90+
return index_table;
91+
}
92+
93+

0 commit comments

Comments
 (0)