1- from brukeropus .file .labels import get_block_type_label , get_data_key
2- from brukeropus .file .parse import ( parse_header ,
3- parse_directory ,
1+ from brukeropus .file .labels import get_data_key
2+ from brukeropus .file .constants import TYPE_CODE_LABELS
3+ from brukeropus . file . parse import ( parse_directory ,
44 parse_params ,
55 parse_data ,
66 parse_data_series ,
7- parse_text )
7+ parse_text ,
8+ parse_report )
89
910
1011__docformat__ = "google"
1112
1213
14+ class BlockType (tuple ):
15+ '''Six-integer tuple representing the category (type) of block within an OPUS file.
16+
17+ Each block in an OPUS file is categorized with six integers, for example (3, 1, 1, 2, 0, 0). This class stores the
18+ integers as a `tuple`, but extends the `tuple` class to provide a few useful functions/attributes.
19+
20+ Args:
21+ block_type: six integers found in the OPUS file directory that describe the block type.
22+
23+ Attributes:
24+ label: human-readable label that describes the block category
25+ '''
26+
27+ def get_label (self ):
28+ '''Converts a six-integer tuple block type into a human readable label.
29+
30+ This package includes the majority of type codes that OPUS uses, but in the event a type code label is not known,
31+ this function will return: "Unknown 0 4" where the first number is the position index, and the second is the
32+ unknown value integer.
33+
34+ Args:
35+ block_type: six integer tuple found in the OPUS file directory that describes the block type
36+
37+ Returns:
38+ label (str): human-readable string label
39+ '''
40+ labels = [self ._get_sub_type_label (idx ) for idx in range (len (self )) if self [idx ] > 0
41+ and self ._get_sub_type_label (idx ) != '' ]
42+ return ' ' .join (labels )
43+
44+ def _get_sub_type_label (self , pos_idx : int ):
45+ '''Returns the sub-type label of a file block given the position index and value of the type code.
46+
47+ Args:
48+ pos_idx: positional index of the type code (0 - 5)
49+
50+ Returns:
51+ label (str): human-readable string label that describes the type code at that index.
52+ '''
53+ try :
54+ return TYPE_CODE_LABELS [pos_idx ][self [pos_idx ]]
55+ except KeyError :
56+ return 'Unknown ' + str (pos_idx ) + ' ' + str (self [pos_idx ])
57+
58+ def get_aligned_tuple_str (self , pad = 1 ):
59+ return f'{ self [0 ]} ' + f'{ self [1 ]:2} ' + f'{ self [2 ]:3} ' + f'{ self [3 ]:3} ' + f'{ self [4 ]:2} ' + f'{ self [5 ]:2} '
60+
61+ def __repr__ (self ):
62+ return 'BlockType((' + ', ' .join ([str (i ) for i in self ]) + '))'
63+
64+ def __str__ (self ):
65+ return self .get_aligned_tuple_str () + ' ' + self .get_label ()
66+
67+ def __new__ (cls , iterable ):
68+ instance = super ().__new__ (cls , iterable )
69+ if len (instance ) != 6 or any (type (i ) != int for i in instance ):
70+ raise ValueError ('BlockType input must be a 6-integer iterable, but a value of:' + str (iterable ) + ' was given' )
71+ return instance
72+
73+ def __init__ (self , iterable ):
74+ super ().__init__ ()
75+ self .label = self .get_label ()
76+
77+
1378class FileBlock :
1479 '''Generic OPUS file block.
1580
16- This class initializes with the most basic file block info from the file directory: type, size, and start location
17- as well as the raw bytes from the file (which can subsequently be parsed).
81+ This class initializes from the block info stored in the file directory ( type, size, and start location) as well as
82+ the raw bytes from the file (which can subsequently be parsed).
1883
1984 Args:
2085 filebytes: raw bytes of the file
@@ -31,15 +96,16 @@ class FileBlock:
3196 parser: name of parsing function if parsing was successful
3297 '''
3398
34- __slots__ = ('type' , 'size' , 'start' , 'bytes' , 'data' , 'parser' , 'keys' )
99+ __slots__ = ('type' , 'size' , 'start' , 'bytes' , 'data' , 'parser' , 'parse_error' , ' keys' )
35100
36101 def __init__ (self , filebytes : bytes , block_type : tuple , size : int , start : int ):
37102 self .bytes = filebytes [start : start + size ]
38- self .type = block_type
103+ self .type = BlockType ( block_type )
39104 self .size = size
40105 self .start = start
41106 self .data = None
42107 self .parser = None
108+ self .parse_error = None
43109
44110 def __str__ (self ):
45111 label = self .get_label ()
@@ -52,7 +118,7 @@ def _try_parser(self, parser):
52118 self .keys = list (self .data .keys ())
53119 self ._clear_parsed_bytes (parser = parser )
54120 except Exception as e :
55- self .data = 'Error parsing: ' + str (e )
121+ self .parse_error = 'Error parsing (' + parser . __name__ + ') : ' + str (e )
56122
57123 def _clear_parsed_bytes (self , parser ):
58124 '''Clear raw bytes that have been parsed (and log the parser for reference)'''
@@ -85,6 +151,10 @@ def is_file_log(self):
85151 '''Returns True if `FileBlock` is the file log (aka 'history') block'''
86152 return self .type == (0 , 0 , 0 , 0 , 0 , 5 )
87153
154+ def is_report (self ):
155+ '''Returns True if `FileBlock` is a test report'''
156+ return self .type in [(0 , 0 , 0 , 0 , 0 , 3 ), (0 , 0 , 0 , 0 , 0 , 4 )] or (self .type [2 ] == 0 and self .type [3 ] not in [0 , 13 ] and self .type [5 ] == 5 )
157+
88158 def is_data (self ):
89159 '''Returns True if `FileBlock` is a 1D data block (not a data series)'''
90160 return self .type [2 ] == 0 and self .type [3 ] not in [0 , 13 ] and self .type [5 ] not in [2 , 5 ]
@@ -95,7 +165,7 @@ def is_data_series(self):
95165
96166 def get_label (self ):
97167 '''Returns a friendly string label that describes the block type'''
98- return get_block_type_label ( self .type )
168+ return self .type . label
99169
100170 def get_data_key (self ):
101171 '''If block is a data block, this function will return a shorthand key to reference that data.
@@ -115,6 +185,8 @@ def get_parser(self):
115185 return parse_text
116186 elif self .is_param ():
117187 return parse_params
188+ elif self .is_report ():
189+ return parse_report
118190 elif self .is_data_series ():
119191 return parse_data_series
120192 elif self .is_data ():
@@ -130,34 +202,6 @@ def parse(self):
130202 self ._try_parser (parser )
131203
132204
133- class FileDirectory :
134- '''Contains type and pointer information for all blocks of data in an OPUS file.
135-
136- `FileDirectory` information is decoded from the raw file bytes of an OPUS file. First the header is read which
137- provides the start location of the directory block, number of blocks in file, and maximum number of blocks the file
138- supports. Then it decodes the block pointer information from each entry of the file's directory block to create a
139- `FileBlock` instance, initiates the block parsing, and adds the parsed block to the `blocks` attribute.
140-
141- Args:
142- filebytes: raw bytes from OPUS file. see: `brukeropus.file.parser.read_opus_file_bytes`
143-
144- Attributes:
145- start: pointer to start location of the directory block
146- max_blocks: maximum number of blocks supported by file
147- num_blocks: total number of blocks in the file
148- blocks: list of `FileBlock` from the file. The class parses these blocks upon initilization of the class.
149- '''
150- def __init__ (self , filebytes : bytes ):
151- self .version , self .start , self .max_blocks , self .num_blocks = parse_header (filebytes )
152- size = self .max_blocks * 3 * 4
153- blocks = []
154- for block_type , size , start in parse_directory (filebytes [self .start : self .start + size ]):
155- block = FileBlock (filebytes = filebytes , block_type = block_type , size = size , start = start )
156- block .parse ()
157- blocks .append (block )
158- self .blocks = blocks
159-
160-
161205def is_data_status_type_match (data_block : FileBlock , data_status_block : FileBlock ) -> bool :
162206 '''Checks if data and data status blocks are a match based soley on the block type.
163207
@@ -174,7 +218,7 @@ def is_data_status_val_match(data_block: FileBlock, data_status_block: FileBlock
174218 When multiple spectra of the same type exist in a file, this is used to distinguish if the data and data status
175219 blocks are a good match. This can reduce the number of duplicate matches, but is not generally sufficient to
176220 fully eliminate duplicate matches.
177-
221+
178222 See test file: `Test Vit C_Glass.0000_comp.0`'''
179223 if data_block .is_data ():
180224 try :
@@ -194,7 +238,7 @@ def is_valid_match(data_block: FileBlock, data_status_block: FileBlock) -> bool:
194238 '''Checks that number of points in data status are less than or equal to length of parsed data block.
195239
196240 This does not apply to data series. While rare, it is occasionally necessary to remove these bad matches.
197-
241+
198242 See test file: `unreadable.0000`'''
199243 if data_block .is_data () and len (data_block .data ) < data_status_block .data ['npt' ]:
200244 return False
@@ -241,8 +285,8 @@ def pair_data_and_status_blocks(blocks: list) -> list:
241285 reduced_matches .append ((d , [b for b in matches if b .start not in single_starts ]))
242286 single_matches = single_matches + [(m [0 ], m [1 ][0 ]) for m in reduced_matches if len (m [1 ]) == 1 ]
243287 multi_matches = [match for match in reduced_matches if len (match [1 ]) > 1 ]
244-
288+
245289 single_matches = [match for match in single_matches if is_valid_match (match [0 ], match [1 ])] # remove invalid
246-
290+
247291 single_matches .sort (key = lambda pairs : pairs [0 ].start , reverse = True ) # last spec seems to be OPUS preference
248- return single_matches
292+ return single_matches
0 commit comments