@@ -9,49 +9,50 @@ def __init__(self, bytes_, offset, word_size):
99 self .offset = offset
1010
1111 def get_word_info (self , word_id ):
12+ orig_pos = self .bytes .tell ()
1213 index = self .word_id_to_offset (word_id )
13-
14- surface = self .buffer_to_string (index )
15- index += 1 + 2 * len (surface )
16- head_word_length = self .bytes [index ]
17- index += 1
18- pos_id = int .from_bytes (self .bytes [index :index + 2 ], 'little' )
19- index += 2
20- normalized_form = self .buffer_to_string (index )
21- index += 1 + 2 * len (normalized_form )
14+ self .bytes .seek (index )
15+ surface = self .buffer_to_string ()
16+ head_word_length = self .buffer_to_string_length ()
17+ pos_id = int .from_bytes (self .bytes .read (2 ), 'little' )
18+ normalized_form = self .buffer_to_string ()
2219 if not normalized_form :
2320 normalized_form = surface
24- dictionary_form_word_id = int .from_bytes (self .bytes [index :index + 4 ], "little" , signed = True )
25- index += 4
26- reading_form = self .buffer_to_string (index )
27- index += 1 + 2 * len (reading_form )
28- a_unit_split = self .buffer_to_int_array (index )
29- index += 1 + 4 * len (a_unit_split )
30- b_unit_split = self .buffer_to_int_array (index )
31- index += 1 + 4 * len (b_unit_split )
32- word_structure = self .buffer_to_int_array (index )
21+ dictionary_form_word_id = int .from_bytes (self .bytes .read (4 ), 'little' , signed = True )
22+ reading_form = self .buffer_to_string ()
23+ a_unit_split = self .buffer_to_int_array ()
24+ b_unit_split = self .buffer_to_int_array ()
25+ word_structure = self .buffer_to_int_array ()
3326
3427 dictionary_form = surface
3528 if dictionary_form_word_id >= 0 and dictionary_form_word_id != word_id :
3629 wi = self .get_word_info (dictionary_form_word_id )
3730 dictionary_form = wi .surface
3831
32+ self .bytes .seek (orig_pos )
33+
3934 return wordinfo .WordInfo (surface , head_word_length , pos_id , normalized_form ,
4035 dictionary_form_word_id , dictionary_form , reading_form ,
4136 a_unit_split , b_unit_split , word_structure )
4237
4338 def word_id_to_offset (self , word_id ):
4439 i = self .offset + 4 * word_id
45- return int .from_bytes (self .bytes [i :i + 4 ], "little" , signed = False )
46-
47- def buffer_to_string (self , offset ):
48- length = self .bytes [offset ]
49- offset += 1
50- end = offset + 2 * length
51- return self .bytes [offset :end ].decode ("utf-16-le" )
52-
53- def buffer_to_int_array (self , offset ):
54- length = self .bytes [offset ]
55- offset += 1
56- array = struct .unpack_from ("<{}I" .format (length ), self .bytes , offset )
40+ return int .from_bytes (self .bytes [i :i + 4 ], 'little' , signed = False )
41+
42+ def buffer_to_string_length (self ):
43+ length = self .bytes .read_byte ()
44+ if length < 128 :
45+ return length
46+ low = self .bytes .read_byte ()
47+ return ((length & 0x7F ) << 8 ) | low
48+
49+ def buffer_to_string (self ):
50+ length = self .buffer_to_string_length ()
51+ return self .bytes .read (2 * length ).decode ('utf-16-le' )
52+
53+ def buffer_to_int_array (self ):
54+ length = self .bytes .read_byte ()
55+ array = []
56+ for _ in range (length ):
57+ array .append (int .from_bytes (self .bytes .read (4 ), 'little' , signed = True ))
5758 return array
0 commit comments