Skip to content

Commit 5b625fb

Browse files
committed
添加对 POT 与 GNT 的数据处理 API
new file: casia/image.py
1 parent d3f27aa commit 5b625fb

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

casia/image.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import struct
2+
import numpy as np
3+
4+
5+
class GNT:
6+
'''GNT 文件的解码器'''
7+
def __init__(self, Z, set_name):
8+
self.Z = Z
9+
self.set_name = set_name # 数据集名称
10+
11+
def __iter__(self):
12+
with self.Z.open(self.set_name) as fp:
13+
head = True
14+
while head:
15+
head = fp.read(4)
16+
if not head: # 判断文件是否读到结尾
17+
break # 读到文件结尾立即结束
18+
head = struct.unpack('I', head)[0]
19+
tag_code = fp.read(2).decode('gb18030')
20+
width, height = struct.unpack('2H', fp.read(4))
21+
bitmap = np.frombuffer(fp.read(width*height), np.uint8)
22+
img = bitmap.reshape((height, width))
23+
yield img, tag_code
24+
25+
26+
class POT:
27+
'''POT 解码器'''
28+
def __init__(self, Z, set_name):
29+
self.Z = Z
30+
self._fp = Z.open(set_name)
31+
32+
def __iter__(self):
33+
size = struct.unpack('H', self._fp.read(2))[0] # Sample size
34+
tag = {} # 记录字符与笔画
35+
sizes = []
36+
tag_id = 0
37+
while size:
38+
sizes.append(size)
39+
tag_code = self._fp.read(4).decode(
40+
'gb18030').strip('\x00') # 字符解码
41+
stroke_num = struct.unpack('H', self._fp.read(2))[0] # 笔画数
42+
strokes = {k: [] for k in range(stroke_num)}
43+
k = 0
44+
while k <= stroke_num:
45+
xy = struct.unpack('2h', self._fp.read(4))
46+
if xy == (-1, 0):
47+
k += 1
48+
elif xy == (-1, -1):
49+
tag.update({tag_id: {tag_code: strokes}}) # 更新字典
50+
tag_id += 1
51+
size = self._fp.read(2)
52+
if size == b'': # 判断是否解码完成
53+
... # print('解码结束!')
54+
else:
55+
size = struct.unpack('H', size)[0] # Sample size
56+
break
57+
else:
58+
strokes[k].append(xy) # 记录笔迹坐标
59+
yield tag, sizes

0 commit comments

Comments
 (0)