Skip to content

Commit f1b1f7c

Browse files
committed
Merge branch 'add_mev' into brukeropus/master
2 parents 4b4a4d0 + 92f7781 commit f1b1f7c

26 files changed

Lines changed: 8409 additions & 5469 deletions

brukeropus/file/__init__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,15 @@
383383
`Data`: `brukeropus.file.file.Data`
384384
`DataSeries`: `brukeropus.file.file.DataSeries`
385385
'''
386-
from brukeropus.file.file import *
386+
387387
from brukeropus.file.block import *
388-
from brukeropus.file.parse import *
388+
from brukeropus.file.constants import *
389+
from brukeropus.file.data import *
390+
from brukeropus.file.directory import *
391+
from brukeropus.file.file import *
389392
from brukeropus.file.labels import *
393+
from brukeropus.file.params import *
394+
from brukeropus.file.parse import *
395+
from brukeropus.file.report import *
390396
from brukeropus.file.utils import *
391-
from brukeropus.file.constants import *
397+

brukeropus/file/block.py

Lines changed: 87 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,85 @@
1-
from brukeropus.file.labels import get_block_type_label, get_data_key
2-
from brukeropus.file.parse import (parse_header,
3-
parse_directory,
1+
from brukeropus.file.labels import get_data_key
2+
from brukeropus.file.constants import TYPE_CODE_LABELS
3+
from brukeropus.file.parse import (parse_directory,
44
parse_params,
55
parse_data,
66
parse_data_series,
7-
parse_text)
7+
parse_text,
8+
parse_report)
89

910

1011
__docformat__ = "google"
1112

1213

14+
class BlockType(tuple):
15+
'''Six-integer tuple representing the category (type) of block within an OPUS file.
16+
17+
Each block in an OPUS file is categorized with six integers, for example (3, 1, 1, 2, 0, 0). This class stores the
18+
integers as a `tuple`, but extends the `tuple` class to provide a few useful functions/attributes.
19+
20+
Args:
21+
block_type: six integers found in the OPUS file directory that describe the block type.
22+
23+
Attributes:
24+
label: human-readable label that describes the block category
25+
'''
26+
27+
def get_label(self):
28+
'''Converts a six-integer tuple block type into a human readable label.
29+
30+
This package includes the majority of type codes that OPUS uses, but in the event a type code label is not known,
31+
this function will return: "Unknown 0 4" where the first number is the position index, and the second is the
32+
unknown value integer.
33+
34+
Args:
35+
block_type: six integer tuple found in the OPUS file directory that describes the block type
36+
37+
Returns:
38+
label (str): human-readable string label
39+
'''
40+
labels = [self._get_sub_type_label(idx) for idx in range(len(self)) if self[idx] > 0
41+
and self._get_sub_type_label(idx) != '']
42+
return ' '.join(labels)
43+
44+
def _get_sub_type_label(self, pos_idx: int):
45+
'''Returns the sub-type label of a file block given the position index and value of the type code.
46+
47+
Args:
48+
pos_idx: positional index of the type code (0 - 5)
49+
50+
Returns:
51+
label (str): human-readable string label that describes the type code at that index.
52+
'''
53+
try:
54+
return TYPE_CODE_LABELS[pos_idx][self[pos_idx]]
55+
except KeyError:
56+
return 'Unknown ' + str(pos_idx) + ' ' + str(self[pos_idx])
57+
58+
def get_aligned_tuple_str(self, pad=1):
59+
return f'{self[0]}' + f'{self[1]:2}' + f'{self[2]:3}' + f'{self[3]:3}' + f'{self[4]:2}' + f'{self[5]:2}'
60+
61+
def __repr__(self):
62+
return 'BlockType((' + ', '.join([str(i) for i in self]) + '))'
63+
64+
def __str__(self):
65+
return self.get_aligned_tuple_str() + ' ' + self.get_label()
66+
67+
def __new__(cls, iterable):
68+
instance = super().__new__(cls, iterable)
69+
if len(instance) != 6 or any(type(i) != int for i in instance):
70+
raise ValueError('BlockType input must be a 6-integer iterable, but a value of:' + str(iterable) + ' was given')
71+
return instance
72+
73+
def __init__(self, iterable):
74+
super().__init__()
75+
self.label = self.get_label()
76+
77+
1378
class FileBlock:
1479
'''Generic OPUS file block.
1580
16-
This class initializes with the most basic file block info from the file directory: type, size, and start location
17-
as well as the raw bytes from the file (which can subsequently be parsed).
81+
This class initializes from the block info stored in the file directory (type, size, and start location) as well as
82+
the raw bytes from the file (which can subsequently be parsed).
1883
1984
Args:
2085
filebytes: raw bytes of the file
@@ -31,15 +96,16 @@ class FileBlock:
3196
parser: name of parsing function if parsing was successful
3297
'''
3398

34-
__slots__ = ('type', 'size', 'start', 'bytes', 'data', 'parser', 'keys')
99+
__slots__ = ('type', 'size', 'start', 'bytes', 'data', 'parser', 'parse_error', 'keys')
35100

36101
def __init__(self, filebytes: bytes, block_type: tuple, size: int, start: int):
37102
self.bytes = filebytes[start: start + size]
38-
self.type = block_type
103+
self.type = BlockType(block_type)
39104
self.size = size
40105
self.start = start
41106
self.data = None
42107
self.parser = None
108+
self.parse_error = None
43109

44110
def __str__(self):
45111
label = self.get_label()
@@ -52,7 +118,7 @@ def _try_parser(self, parser):
52118
self.keys = list(self.data.keys())
53119
self._clear_parsed_bytes(parser=parser)
54120
except Exception as e:
55-
self.data = 'Error parsing: ' + str(e)
121+
self.parse_error = 'Error parsing (' + parser.__name__ + '): ' + str(e)
56122

57123
def _clear_parsed_bytes(self, parser):
58124
'''Clear raw bytes that have been parsed (and log the parser for reference)'''
@@ -85,6 +151,10 @@ def is_file_log(self):
85151
'''Returns True if `FileBlock` is the file log (aka 'history') block'''
86152
return self.type == (0, 0, 0, 0, 0, 5)
87153

154+
def is_report(self):
155+
'''Returns True if `FileBlock` is a test report'''
156+
return self.type in [(0, 0, 0, 0, 0, 3), (0, 0, 0, 0, 0, 4)] or (self.type[2] == 0 and self.type[3] not in [0, 13] and self.type[5] == 5)
157+
88158
def is_data(self):
89159
'''Returns True if `FileBlock` is a 1D data block (not a data series)'''
90160
return self.type[2] == 0 and self.type[3] not in [0, 13] and self.type[5] not in [2, 5]
@@ -95,7 +165,7 @@ def is_data_series(self):
95165

96166
def get_label(self):
97167
'''Returns a friendly string label that describes the block type'''
98-
return get_block_type_label(self.type)
168+
return self.type.label
99169

100170
def get_data_key(self):
101171
'''If block is a data block, this function will return a shorthand key to reference that data.
@@ -115,6 +185,8 @@ def get_parser(self):
115185
return parse_text
116186
elif self.is_param():
117187
return parse_params
188+
elif self.is_report():
189+
return parse_report
118190
elif self.is_data_series():
119191
return parse_data_series
120192
elif self.is_data():
@@ -130,34 +202,6 @@ def parse(self):
130202
self._try_parser(parser)
131203

132204

133-
class FileDirectory:
134-
'''Contains type and pointer information for all blocks of data in an OPUS file.
135-
136-
`FileDirectory` information is decoded from the raw file bytes of an OPUS file. First the header is read which
137-
provides the start location of the directory block, number of blocks in file, and maximum number of blocks the file
138-
supports. Then it decodes the block pointer information from each entry of the file's directory block to create a
139-
`FileBlock` instance, initiates the block parsing, and adds the parsed block to the `blocks` attribute.
140-
141-
Args:
142-
filebytes: raw bytes from OPUS file. see: `brukeropus.file.parser.read_opus_file_bytes`
143-
144-
Attributes:
145-
start: pointer to start location of the directory block
146-
max_blocks: maximum number of blocks supported by file
147-
num_blocks: total number of blocks in the file
148-
blocks: list of `FileBlock` from the file. The class parses these blocks upon initilization of the class.
149-
'''
150-
def __init__(self, filebytes: bytes):
151-
self.version, self.start, self.max_blocks, self.num_blocks = parse_header(filebytes)
152-
size = self.max_blocks * 3 * 4
153-
blocks = []
154-
for block_type, size, start in parse_directory(filebytes[self.start: self.start + size]):
155-
block = FileBlock(filebytes=filebytes, block_type=block_type, size=size, start=start)
156-
block.parse()
157-
blocks.append(block)
158-
self.blocks = blocks
159-
160-
161205
def is_data_status_type_match(data_block: FileBlock, data_status_block: FileBlock) -> bool:
162206
'''Checks if data and data status blocks are a match based soley on the block type.
163207
@@ -174,7 +218,7 @@ def is_data_status_val_match(data_block: FileBlock, data_status_block: FileBlock
174218
When multiple spectra of the same type exist in a file, this is used to distinguish if the data and data status
175219
blocks are a good match. This can reduce the number of duplicate matches, but is not generally sufficient to
176220
fully eliminate duplicate matches.
177-
221+
178222
See test file: `Test Vit C_Glass.0000_comp.0`'''
179223
if data_block.is_data():
180224
try:
@@ -194,7 +238,7 @@ def is_valid_match(data_block: FileBlock, data_status_block: FileBlock) -> bool:
194238
'''Checks that number of points in data status are less than or equal to length of parsed data block.
195239
196240
This does not apply to data series. While rare, it is occasionally necessary to remove these bad matches.
197-
241+
198242
See test file: `unreadable.0000`'''
199243
if data_block.is_data() and len(data_block.data) < data_status_block.data['npt']:
200244
return False
@@ -241,8 +285,8 @@ def pair_data_and_status_blocks(blocks: list) -> list:
241285
reduced_matches.append((d, [b for b in matches if b.start not in single_starts]))
242286
single_matches = single_matches + [(m[0], m[1][0]) for m in reduced_matches if len(m[1]) == 1]
243287
multi_matches = [match for match in reduced_matches if len(match[1]) > 1]
244-
288+
245289
single_matches = [match for match in single_matches if is_valid_match(match[0], match[1])] # remove invalid
246-
290+
247291
single_matches.sort(key=lambda pairs: pairs[0].start, reverse=True) # last spec seems to be OPUS preference
248-
return single_matches
292+
return single_matches

brukeropus/file/constants.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json, importlib.resources
21
import numpy as np
32

43

@@ -71,7 +70,7 @@
7170
2: '(Series)',
7271
3: 'Molecular Structure',
7372
4: 'Macro',
74-
5: 'File Log',
73+
5: 'History/Report',
7574
}
7675

7776
# care must be taken when using 3-letter keys to avoid cross contaminating with 3-char parameter keys
@@ -135,6 +134,16 @@
135134
]
136135

137136

137+
SUBREPORT_TYPE_FMT = {
138+
3: '<i',
139+
4: '<i',
140+
5: '<f',
141+
6: '<d',
142+
7: '<i',
143+
# > 1000: '<s' where length = type int - 1000
144+
}
145+
146+
138147
Y_LABELS = {
139148
'sm': 'Sample Spectrum',
140149
'rf': 'Reference Spectrum',

0 commit comments

Comments
 (0)