Skip to content

Commit 34610d2

Browse files
authored
Merge pull request #15 from SomaLogic/CAN-25
Bump version to 1.2.3, enhance error messaging in metadata concatenation, and add tests for various header data types
2 parents 2a036b1 + bfdbedf commit 34610d2

File tree

4 files changed

+68
-21
lines changed

4 files changed

+68
-21
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "somadata"
3-
version = "1.2.2"
3+
version = "1.2.3"
44
description = "SomaLogic Python Data Input/Output Library"
55
authors = [
66
"Joseph Allison",

somadata/io/adat/file.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,10 @@ def write_adat(
279279
writer.writerow(['^HEADER'])
280280
for row in adat.header_metadata.items():
281281
# We need to handle the reportconfig in a special way since it has double quotes
282-
if type(row[1]) == dict:
282+
if row[0] == 'ReportConfig' and type(row[1]) == dict:
283283
f.write(row[0] + '\t' + json.dumps(row[1], separators=(',', ':')) + '\r\n')
284284
else:
285-
f.write(row[0] + '\t' + row[1] + '\r\n')
285+
writer.writerow([x for x in row if x is not None])
286286

287287
# Write COL_DATA section
288288
writer.writerow(['^COL_DATA'])

somadata/tools/adat_concatenation.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from __future__ import annotations
2+
3+
import copy
4+
import re
5+
from typing import Dict, List
6+
27
from somadata import Adat
38
from somadata.tools.errors import AdatConcatError
4-
from typing import List, Dict
9+
510
from . import adat_concatenation_utils
6-
import copy
7-
import re
811

912

1013
def _set_addition(key, value1, value2):
@@ -17,7 +20,9 @@ def _set_addition(key, value1, value2):
1720

1821
def _exact_match(key, value1, value2):
1922
if value1 != value2:
20-
raise AdatConcatError(f'Header metadata mismatch where exact match is required. Key: {key}, Values: {value1}, {value2}')
23+
raise AdatConcatError(
24+
f'Header metadata mismatch where exact match is required. Key: {key}, Values: {value1}, {value2}'
25+
)
2126
return value1
2227

2328

@@ -69,7 +74,6 @@ def _concat_header_metadata(adats: List[Adat], merge_strategy=None):
6974
# Add the rest of the adats' headers
7075
for adat in adats[1:]:
7176
for key, value in adat.header_metadata.items():
72-
7377
# If key is not in the base_header, add it
7478
if key not in base_header:
7579
base_header[key] = value
@@ -95,11 +99,17 @@ def _concat_column_metadata(adats: List[Adat]) -> Dict(str, List):
9599
for name in adat.columns.names:
96100
values = list(adat.columns.get_level_values(name))
97101
if name == 'ColCheck':
98-
col_checks.append([True if value == 'PASS' else False for value in values])
102+
col_checks.append(
103+
[True if value == 'PASS' else False for value in values]
104+
)
99105
col_metadata['ColCheck'] = []
100106
elif name in col_metadata:
101107
if col_metadata[name] != values:
102-
raise AdatConcatError('Mismatching column metadata in: ' + name)
108+
raise AdatConcatError(
109+
f'Mismatching column metadata in: {name}\n'
110+
f'Existing values: {col_metadata[name]}\n'
111+
f'New values: {values}'
112+
)
103113
else:
104114
col_metadata[name] = values
105115

@@ -122,7 +132,10 @@ def _concat_row_metadata(adats: List[Adat]) -> Dict(str, List):
122132
symmetric_difference = symmetric_difference.union(names ^ set(adat.index.names))
123133
names = names.union(symmetric_difference)
124134
if len(symmetric_difference) > 0:
125-
raise AdatConcatError('Mismatching index name, ensure row metadata columns match. Names: ' + ', '.join(sorted(symmetric_difference)))
135+
raise AdatConcatError(
136+
'Mismatching index name, ensure row metadata columns match. Names: '
137+
+ ', '.join(sorted(symmetric_difference))
138+
)
126139

127140
# Get Row Metadata
128141
row_metadata = {}
@@ -181,12 +194,16 @@ def concatenate_adats(adats: List[Adat], header_merge_strategy: Dict = None) ->
181194
>>> adat = concatenate_adats([adat1, adat2, adat3], header_merge_strategy={'default_action': 'null', 'properties': {'AdatId': 'exact'}})
182195
"""
183196

184-
header_metadata = _concat_header_metadata(adats, merge_strategy=header_merge_strategy)
197+
header_metadata = _concat_header_metadata(
198+
adats, merge_strategy=header_merge_strategy
199+
)
185200
column_metadata = _concat_column_metadata(adats)
186201
row_metadata = _concat_row_metadata(adats)
187202
rfu_matrix = _concat_rfus(adats)
188203

189-
adat = Adat.from_features(rfu_matrix, row_metadata, column_metadata, header_metadata)
204+
adat = Adat.from_features(
205+
rfu_matrix, row_metadata, column_metadata, header_metadata
206+
)
190207
return adat
191208

192209

@@ -200,7 +217,7 @@ def _quick_concat(adats):
200217
data=rfu_matrix,
201218
index=row_multiindex,
202219
columns=adats[0].columns,
203-
header_metadata=adats[0].header_metadata
220+
header_metadata=adats[0].header_metadata,
204221
)
205222

206223

@@ -233,21 +250,23 @@ def smart_adat_concatenation(adats, somamer_source_adat=None):
233250
# About to change the adats somamer metadata. Make sure their seqids are the same.
234251
if type(somamer_source_adat) == Adat:
235252
adats = adats + [somamer_source_adat]
236-
253+
237254
adats = adat_concatenation_utils.prepare_rfu_matrix_for_inner_merge(adats)
238255

239256
# Unpack & update if we're updating
240257
if type(somamer_source_adat) == Adat:
241258
somamer_source_adat = adats[-1]
242259
adats = adats[0:-1]
243-
adats = adat_concatenation_utils.convert_somamer_metadata_to_source(adats, somamer_source_adat)
260+
adats = adat_concatenation_utils.convert_somamer_metadata_to_source(
261+
adats, somamer_source_adat
262+
)
244263

245264
header_merge_strategy = {
246265
'default_action': 'exact_match',
247266
'properties': {
248267
'AdatId': 'null',
249268
'!AdatId': 'null',
250-
}
269+
},
251270
}
252271

253272
adats = adat_concatenation_utils.robust_merge_adat_headers(adats)

tests/test_adat_writing.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ def setUp(self):
2626
self.adat_compatibility_mode = somadata.read_adat(
2727
'./tests/data/control_data.adat', compatibility_mode=True
2828
)
29+
self.original_report_config = self.adat.header_metadata.get(
30+
'ReportConfig', None
31+
)
2932

3033
def tearDown(self):
3134
if os.path.exists(self.filename):
@@ -37,6 +40,33 @@ def test_adat_write(self):
3740
self.adat.to_adat(self.filename)
3841
self.assertTrue(os.path.exists(self.filename))
3942

43+
def test_adat_write_header_various_types(self):
44+
# Test various data types in header metadata
45+
self.adat.header_metadata['HeaderString'] = "test_string"
46+
self.adat.header_metadata['HeaderNumeric'] = 123456789
47+
self.adat.header_metadata['HeaderFloat'] = 1.23456789
48+
self.adat.header_metadata['HeaderBoolean'] = True
49+
self.adat.header_metadata['HeaderNone'] = None
50+
self.adat.header_metadata['HeaderList'] = [1, 2, 3]
51+
self.adat.header_metadata['HeaderDict'] = {
52+
('key1', 'key2'): 'value1',
53+
'key3': 'value2',
54+
}
55+
56+
self.adat.to_adat(self.filename)
57+
_, _, _, header_metadata = somadata.parse_file(self.filename)
58+
59+
self.assertEqual(header_metadata['HeaderString'], 'test_string')
60+
self.assertEqual(header_metadata['HeaderNumeric'], '123456789')
61+
self.assertEqual(header_metadata['HeaderFloat'], '1.23456789')
62+
self.assertEqual(header_metadata['HeaderBoolean'], 'True')
63+
self.assertEqual(header_metadata['HeaderNone'], '')
64+
self.assertEqual(header_metadata['HeaderList'], '[1, 2, 3]')
65+
self.assertEqual(
66+
header_metadata['HeaderDict'],
67+
"{('key1', 'key2'): 'value1', 'key3': 'value2'}",
68+
)
69+
4070
@mock.patch('somadata.io.adat.file.version', require_side_effect)
4171
def test_adat_md5(self):
4272
self.adat.to_adat(self.filename)
@@ -48,10 +78,8 @@ def test_adat_md5(self):
4878
@mock.patch('somadata.io.adat.file.version', require_side_effect)
4979
def test_adat_md5_compatibility(self):
5080
self.adat_compatibility_mode.to_adat(self.filename_compatibility_mode)
51-
hash_md5 = hashlib.md5()
52-
with open(self.filename_compatibility_mode, 'rb') as f:
53-
hash_md5.update(f.read())
54-
self.assertEqual(hash_md5.hexdigest(), 'ff0f3b40b210999093d55e129276201e')
81+
_, _, _, header_metadata = somadata.parse_file(self.filename_compatibility_mode)
82+
self.assertEqual(header_metadata['ReportConfig'], self.original_report_config)
5583

5684

5785
def require_side_effect_0_2(*args, **kwargs):

0 commit comments

Comments
 (0)