1
+ import struct
2
+ import numpy as np
3
+
4
+
5
+ class GNT :
6
+ '''GNT 文件的解码器'''
7
+ def __init__ (self , Z , set_name ):
8
+ self .Z = Z
9
+ self .set_name = set_name # 数据集名称
10
+
11
+ def __iter__ (self ):
12
+ with self .Z .open (self .set_name ) as fp :
13
+ head = True
14
+ while head :
15
+ head = fp .read (4 )
16
+ if not head : # 判断文件是否读到结尾
17
+ break # 读到文件结尾立即结束
18
+ head = struct .unpack ('I' , head )[0 ]
19
+ tag_code = fp .read (2 ).decode ('gb18030' )
20
+ width , height = struct .unpack ('2H' , fp .read (4 ))
21
+ bitmap = np .frombuffer (fp .read (width * height ), np .uint8 )
22
+ img = bitmap .reshape ((height , width ))
23
+ yield img , tag_code
24
+
25
+
26
+ class POT :
27
+ '''POT 解码器'''
28
+ def __init__ (self , Z , set_name ):
29
+ self .Z = Z
30
+ self ._fp = Z .open (set_name )
31
+
32
+ def __iter__ (self ):
33
+ size = struct .unpack ('H' , self ._fp .read (2 ))[0 ] # Sample size
34
+ tag = {} # 记录字符与笔画
35
+ sizes = []
36
+ tag_id = 0
37
+ while size :
38
+ sizes .append (size )
39
+ tag_code = self ._fp .read (4 ).decode (
40
+ 'gb18030' ).strip ('\x00 ' ) # 字符解码
41
+ stroke_num = struct .unpack ('H' , self ._fp .read (2 ))[0 ] # 笔画数
42
+ strokes = {k : [] for k in range (stroke_num )}
43
+ k = 0
44
+ while k <= stroke_num :
45
+ xy = struct .unpack ('2h' , self ._fp .read (4 ))
46
+ if xy == (- 1 , 0 ):
47
+ k += 1
48
+ elif xy == (- 1 , - 1 ):
49
+ tag .update ({tag_id : {tag_code : strokes }}) # 更新字典
50
+ tag_id += 1
51
+ size = self ._fp .read (2 )
52
+ if size == b'' : # 判断是否解码完成
53
+ ... # print('解码结束!')
54
+ else :
55
+ size = struct .unpack ('H' , size )[0 ] # Sample size
56
+ break
57
+ else :
58
+ strokes [k ].append (xy ) # 记录笔迹坐标
59
+ yield tag , sizes
0 commit comments