Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ pip install -e ./somadata

### Dependencies

`Python >=3.8` is required to install `somadata`. The following package dependencies are installed on a `pip install`:
- `pandas >= 1.1.0`
`Python >=3.9` is required to install `somadata`. The following package dependencies are installed on a `pip install`:
- `pandas >= 1.1.2`
- `numpy >= 1.19.1`

[return to top](#toptoc)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "somadata"
version = "1.2.1"
version = "1.2.2"
description = "SomaLogic Python Data Input/Output Library"
authors = [
"Joseph Allison",
Expand Down
13 changes: 10 additions & 3 deletions somadata/io/adat/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def parse_file(
"""
if type(f) == str:
f = open(f, 'r')
elif type(f) != io.TextIOWrapper:
raise AdatReadError('File must be a string or file object.')
elif not hasattr(f, 'read'):
raise AdatReadError('File must be a string or file-like object.')

current_section = None

Expand Down Expand Up @@ -161,9 +161,16 @@ def parse_file(
elif matrix_depth > col_metadata_length:
# Store in row metadata into dictionary
row_metadata_data = line[:row_metadata_offset]
# Check for missing metadata and handle it
if len(row_metadata_data) < len(row_metadata_names):
missing_count = len(row_metadata_names) - len(row_metadata_data)
logging.warning(
f"Row metadata has {missing_count} missing values. "
f"Filling missing entries with empty strings."
)
row_metadata_data = list(row_metadata_data) + [""] * missing_count
for name, data in zip(row_metadata_names, row_metadata_data):
row_metadata[name].append(data)

# Store the RFU data
rfu_row_data = line[row_metadata_offset + 1 :]
converted_rfu_row_data = list(map(float, rfu_row_data))
Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import csv
from pathlib import Path

import numpy as np
import pytest

import somadata as sd


@pytest.fixture(scope="session")
def control_data_path() -> str:
return str(Path(__file__).parent / 'data' / 'control_data.adat')


@pytest.fixture(scope="session")
def control_data(control_data_path: str) -> sd.Adat:
return sd.read_adat(control_data_path)


@pytest.fixture(scope="session")
def missing_rfu_adat_path(control_data_path: str, tmp_path_factory) -> str:
fn = str(tmp_path_factory.mktemp("data") / "missing_rfu_test.adat")
# Read ADAT as TSV
with open(control_data_path, "r", newline="", encoding="utf-8") as f:
reader = [row for row in csv.reader(f, delimiter="\t")]
# Modify only the last row
if reader:
reader[-1] = reader[-1][:33] # Keep only the first 33 columns (up to column AG)
# Write back to the file while preserving tab delimiters
with open(fn, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, delimiter="\t")
writer.writerows(reader)
return fn
Empty file added tests/io/adat/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions tests/io/adat/test_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import io
import logging

import pytest

from somadata.io.adat.file import parse_file


def test_parse_file_with_missing_row_metadata(missing_rfu_adat_path: str, caplog):
with caplog.at_level(logging.WARNING):
rfu_matrix, row_metadata, column_metadata, header_metadata = parse_file(
missing_rfu_adat_path
)

# Assert that the warning was logged about missing row metadata
warning_message = (
"Row metadata has 3 missing values. Filling missing entries with empty strings."
)
assert any(
warning_message in record.message for record in caplog.records
), "Expected warning about missing row metadata not found."

# Verify row_metadata structure is correctly filled
# First row is fine
assert row_metadata["ANMLFractionUsed_20"] == [
'',
'',
'0.817',
'',
'',
'0.791',
'',
'',
'',
'0.832',
'',
]
assert row_metadata["ANMLFractionUsed_0_5"] == [
'',
'',
'0.836',
'',
'',
'0.829',
'',
'',
'',
'0.840',
'',
]