@@ -10,6 +10,7 @@ class Reading:
1010 lemma : str = ''
1111 tags : List [str ] = field (default_factory = list )
1212 subreading : Optional ['Reading' ] = None
13+ deleted : bool = False
1314
1415@dataclass
1516class Cohort :
@@ -29,9 +30,15 @@ class Window:
2930 rem_vars : List [str ] = field (default_factory = list )
3031 text : str = ''
3132 text_post : str = ''
32- flush_after : bool = False
3333 dep_has_spanned : bool = False
3434
35+ @dataclass
36+ class Packet :
37+ type : str = ''
38+ window : Optional [Window ] = None
39+ command : str = ''
40+ text : str = ''
41+
3542def parse_binary_window (buf ):
3643 '''Given a bytestring `buf` containing a single window
3744 (not including the length header), parse and return a Window()
@@ -56,8 +63,6 @@ def read_str():
5663 window = Window ()
5764 window_flags = read_u16 ()
5865 if window_flags & 1 :
59- window .flush_after = True
60- if window_flags & 2 :
6166 window .dep_has_spanned = True
6267 tag_count = read_u16 ()
6368 tags = [read_str () for i in range (tag_count )]
@@ -109,13 +114,17 @@ def read_tags():
109114 prev .subreading = reading
110115 else :
111116 cohort .readings .append (reading )
117+ if reading_flags & 2 :
118+ reading .deleted = True
112119 prev = reading
113120 window .cohorts .append (cohort )
114121 return window
115122
116- def parse_binary_stream (fin ):
117- '''Given a file `fin`, yield a series of Window() objects.
118- raises ValueError if stream header is missing or invalid.'''
123+ def parse_binary_stream (fin , windows_only = False ):
124+ '''Given a file `fin`, yield a series of Packet() objects.
125+ raises ValueError if stream header is missing or invalid.
126+ If `windows_only` is True, packets containing commands or text
127+ are skipped and Window() objects are returned instead.'''
119128
120129 header = fin .read (8 )
121130 label , version = struct .unpack ('<4sI' , header )
@@ -124,11 +133,45 @@ def parse_binary_stream(fin):
124133 if version != 1 :
125134 raise ValueError ('Unknown binary format version!' )
126135 while True :
127- spec = fin .read (4 )
128- if len (spec ) != 4 :
129- break ;
130- block_len = struct .unpack ('<I' , spec )[0 ]
131- block = fin .read (block_len )
132- if len (block ) != block_len :
136+ ptype = fin .read (1 )
137+ if len (ptype ) != 1 :
133138 break
134- yield parse_binary_window (block )
139+ if ptype [0 ] == 1 :
140+ spec = fin .read (4 )
141+ if len (spec ) != 4 :
142+ break ;
143+ block_len = struct .unpack ('<I' , spec )[0 ]
144+ block = fin .read (block_len )
145+ if len (block ) != block_len :
146+ break
147+ window = parse_binary_window (block )
148+ if windows_only :
149+ yield window
150+ else :
151+ yield Packet (type = 'window' , window = window )
152+ elif ptype [0 ] == 2 :
153+ cmd = fin .read (1 )
154+ if len (cmd ) != 1 :
155+ break
156+ if windows_only :
157+ continue
158+ pack = Packet (type = 'command' )
159+ if cmd [0 ] == 1 :
160+ pack .command = 'FLUSH'
161+ elif cmd [0 ] == 2 :
162+ pack .command = 'EXIT'
163+ elif cmd [0 ] == 3 :
164+ pack .command = 'IGNORE'
165+ elif cmd [0 ] == 4 :
166+ pack .command = 'RESUME'
167+ else :
168+ continue
169+ elif ptype [0 ] == 3 :
170+ lbuf = fin .read (2 )
171+ ln = struct .unpack ('<I' , lbuf )[0 ]
172+ pack = Packet (type = 'text' )
173+ pack .text = fin .read (ln ).decode ('utf-8' )
174+ if not windows_only :
175+ yield pack
176+ else :
177+ continue
0 commit comments