Skip to content

Commit 9b9a4c3

Browse files
committed
CAN-24: Fix ValueError when reading adats with Empty RFUs
1 parent 3ba61f4 commit 9b9a4c3

File tree

6 files changed

+96
-6
lines changed

6 files changed

+96
-6
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ pip install -e ./somadata
5353

5454
### Dependencies
5555

56-
`Python >=3.8` is required to install `somadata`. The following package dependencies are installed on a `pip install`:
57-
- `pandas >= 1.1.0`
56+
`Python >=3.9` is required to install `somadata`. The following package dependencies are installed on a `pip install`:
57+
- `pandas >= 1.1.2`
5858
- `numpy >= 1.19.1`
5959

6060
[return to top](#toptoc)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "somadata"
3-
version = "1.2.1"
3+
version = "1.2.2"
44
description = "SomaLogic Python Data Input/Output Library"
55
authors = [
66
"Joseph Allison",

somadata/io/adat/file.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def parse_file(
4646
"""
4747
if type(f) == str:
4848
f = open(f, 'r')
49-
elif type(f) != io.TextIOWrapper:
50-
raise AdatReadError('File must be a string or file object.')
49+
elif not hasattr(f, 'read'):
50+
raise AdatReadError('File must be a string or file-like object.')
5151

5252
current_section = None
5353

@@ -161,9 +161,16 @@ def parse_file(
161161
elif matrix_depth > col_metadata_length:
162162
# Store in row metadata into dictionary
163163
row_metadata_data = line[:row_metadata_offset]
164+
# Check for missing metadata and handle it
165+
if len(row_metadata_data) < len(row_metadata_names):
166+
missing_count = len(row_metadata_names) - len(row_metadata_data)
167+
logging.warning(
168+
f"Row metadata has {missing_count} missing values. "
169+
f"Filling missing entries with empty strings."
170+
)
171+
row_metadata_data = list(row_metadata_data) + [""] * missing_count
164172
for name, data in zip(row_metadata_names, row_metadata_data):
165173
row_metadata[name].append(data)
166-
167174
# Store the RFU data
168175
rfu_row_data = line[row_metadata_offset + 1 :]
169176
converted_rfu_row_data = list(map(float, rfu_row_data))

tests/conftest.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import csv
2+
from pathlib import Path
3+
4+
import numpy as np
5+
import pytest
6+
7+
import somadata as sd
8+
9+
10+
@pytest.fixture(scope="session")
11+
def control_data_path() -> str:
12+
return str(Path(__file__).parent / 'data' / 'control_data.adat')
13+
14+
15+
@pytest.fixture(scope="session")
16+
def control_data(control_data_path: str) -> sd.Adat:
17+
return sd.read_adat(control_data_path)
18+
19+
20+
@pytest.fixture(scope="session")
21+
def missing_rfu_adat_path(control_data_path: str, tmp_path_factory) -> str:
22+
fn = str(tmp_path_factory.mktemp("data") / "missing_rfu_test.adat")
23+
# Read ADAT as TSV
24+
with open(control_data_path, "r", newline="", encoding="utf-8") as f:
25+
reader = [row for row in csv.reader(f, delimiter="\t")]
26+
# Modify only the last row
27+
if reader:
28+
reader[-1] = reader[-1][:33] # Keep only the first 33 columns (up to column AG)
29+
# Write back to the file while preserving tab delimiters
30+
with open(fn, "w", newline="", encoding="utf-8") as f:
31+
writer = csv.writer(f, delimiter="\t")
32+
writer.writerows(reader)
33+
return fn

tests/io/adat/__init__.py

Whitespace-only changes.

tests/io/adat/test_file.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import io
2+
import logging
3+
4+
import pytest
5+
6+
from somadata.io.adat.file import parse_file
7+
8+
9+
def test_parse_file_with_missing_row_metadata(missing_rfu_adat_path: str, caplog):
10+
with caplog.at_level(logging.WARNING):
11+
rfu_matrix, row_metadata, column_metadata, header_metadata = parse_file(
12+
missing_rfu_adat_path
13+
)
14+
15+
# Assert that the warning was logged about missing row metadata
16+
warning_message = (
17+
"Row metadata has 3 missing values. Filling missing entries with empty strings."
18+
)
19+
assert any(
20+
warning_message in record.message for record in caplog.records
21+
), "Expected warning about missing row metadata not found."
22+
23+
# Verify row_metadata structure is correctly filled
24+
# First row is fine
25+
assert row_metadata["ANMLFractionUsed_20"] == [
26+
'',
27+
'',
28+
'0.817',
29+
'',
30+
'',
31+
'0.791',
32+
'',
33+
'',
34+
'',
35+
'0.832',
36+
'',
37+
]
38+
assert row_metadata["ANMLFractionUsed_0_5"] == [
39+
'',
40+
'',
41+
'0.836',
42+
'',
43+
'',
44+
'0.829',
45+
'',
46+
'',
47+
'',
48+
'0.840',
49+
'',
50+
]

0 commit comments

Comments
 (0)