SomaLogic · kyoung73 · Jul 15, 2025 · Jul 14, 2025 · Jul 15, 2025
diff --git a/README.md b/README.md
@@ -1,14 +1,17 @@
 # The Python `SomaData` Package from Somalogic, Inc.
 
-[![License:
-MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://choosealicense.com/licenses/mit/)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://choosealicense.com/licenses/mit/)
+[![PyPI](https://img.shields.io/pypi/v/somadata.svg)](https://pypi.org/project/somadata)
 ![PyPI Downloads](https://img.shields.io/pypi/dm/somadata?label=PyPI%20downloads)
+[![Python Versions](https://img.shields.io/pypi/pyversions/somadata.svg)](https://pypi.org/project/somadata/#files)
+[![Actions](https://img.shields.io/github/actions/workflow/status/SomaLogic/Canopy/publish-pypi.yml?event=push)](https://github.com/SomaLogic/Canopy/actions)
+[![GitHub issues](https://img.shields.io/github/issues/SomaLogic/Canopy.svg)](https://github.com/SomaLogic/Canopy/issues)
 
 -----
 
 ## Overview
 
-This document accompanies the Python package `somadata`, which loads the SomaLogic, Inc. structured text data file called an `*.adat`.  The `somadata.Adat` object is an extension of the `pandas.DataFrame` class. The package provides auxiliary functions for extracting relevant information from the ADAT object once in the Python environment. Basic familiarity with the Python environment is assumed, as is the ability to install contributed packages from the Python Package Installer (pip)
+This document accompanies the Python package `somadata`, which loads the SomaLogic, Inc. structured text data file called an `*.adat`.  The `somadata.Adat` object is an extension of the `pandas.DataFrame` class. The package provides auxiliary functions for extracting relevant information from the ADAT object once in the Python environment. Basic familiarity with the Python environment is assumed, as is the ability to install contributed packages from the Python Package Installer (pip).
 
 -----
 
@@ -65,9 +68,6 @@ pip install -e ./somadata
 
 Upon installation, load `somadata` as normal:
 
-[return to top](#toptoc)
-
-
 ```python
 import somadata
 ```
@@ -102,10 +102,12 @@ help(somadata)
 
 ### Internal Objects
 
-The `somadata` package comes with one internal object available to users to run canned examples (or analyses). It can be accessed by perform the import:
+The `somadata` package comes with one internal object available to users to run canned examples (or analyses). It can be accessed by performing the import:
 
   - `from somadata.data.example_data import example_data`
 
+[return to top](#toptoc)
+
 ## Main Features (I/O)
 
   - Loading data (Import)
@@ -120,9 +122,6 @@ The `somadata` package comes with one internal object available to users to run
 
 ### Loading an ADAT
 
-
-[return to top](#toptoc)
-
 Loading the sample file from within the somadata library via its path
 
 
@@ -6119,7 +6118,7 @@ adat.to_adat('/tmp/out_file.adat')
 
 <a name="example"></a>
 
-# Typical Analyses
+## Typical Analyses
 Although it is beyond the scope of the `SomaData` package, below are 3
 sample analyses that typical users/clients would perform on SomaLogic data.
 They are not intended to be a definitive guide in statistical
@@ -6285,7 +6284,7 @@ plt.show()
 
 ![png](README_files/output_71_0.png)
 
-
+[return to top](#toptoc)
 
 ## Logistic Regression (Predict Sex)
 
@@ -7211,10 +7210,4 @@ plt.show()
 
 -----
 
-
 [return to top](#toptoc)
-
-
-```python
-
-```
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,41 +1,50 @@
-[tool.poetry]
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[project]
 name = "somadata"
-version = "1.2.3"
+version = "1.2.4"
 description = "SomaLogic Python Data Input/Output Library"
 authors = [
-    "Joseph Allison",
-    "Ted Johnson",
-    "Kah Young"
-    ]
-maintainers = [
-    "Standard BioTools <support@somalogic.com>"
+    {name = "SomaLogic", email = "support@somalogic.com"},
+    {name = "Joseph Allison"},
+    {name = "Ted Johnson"},
+    {name = "Kah Young"}
 ]
-license = "MIT"
+keywords = ["somalogic", "bioinformatics", "proteomics", "canopy", "adat"]
+license = {text = "MIT"}
 readme = "README.md"
-include = [
-    { path = "somadata/data/lift.zip" },
-    { path = "*.adat" }
-]
 classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.13",
     "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Science/Research",
     "Operating System :: OS Independent",
     "Topic :: Scientific/Engineering",
 ]
+requires-python = ">=3.9"
+dependencies = [
+    "pandas>=1.1.2",
+    "numpy>=1.19",
+    "openpyxl>=3.0"
+]
+
+[project.urls]
 homepage = "https://www.standardbio.com/"
 repository = "https://github.com/SomaLogic/Canopy"
+issues = "https://github.com/SomaLogic/Canopy/issues"
 
-[tool.poetry.dependencies]
-python = "^3.9"
-pandas = ">=1.1.2"
-numpy = ">=1.19"
-openpyxl = "^3.0"
+[tool.poetry]
+include = [
+    { path = "somadata/data/lift.zip" },
+    { path = "*.adat" }
+]
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.1"
 codespell = "^2.2.6"
 pytest-cov = "^6.0.0"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/somadata/annotations.py b/somadata/annotations.py
@@ -88,7 +88,10 @@ def update_adat_column_meta(self, adat: Adat) -> Adat:
 
         Examples
         --------
-        >>> updated_adat = Annotations.update_adat_column_meta(adat)
+        >>> import somadata as sd
+        >>> adat = sd.read_adat('path/to/adat_file.adat')
+        >>> ann = sd.read_annotations('path/to/annotations.xlsx')
+        >>> updated_adat = ann.update_adat_column_meta(adat)
         """
 
         xlsx_to_adat_column_map = {
@@ -102,7 +105,16 @@ def update_adat_column_meta(self, adat: Adat) -> Adat:
             'Entrez Gene ID': 'EntrezGeneID',
         }
 
-        seq_ids = self.index.get_level_values('SeqId')
+        # Check if SeqId is in index or columns
+        if 'SeqId' in self.index.names:
+            seq_ids = self.index.get_level_values('SeqId')
+        elif 'SeqId' in self.columns:
+            seq_ids = self['SeqId']
+        else:
+            raise ValueError(
+                "SeqId not found in either index or columns of annotations data"
+            )
+
         mod_adat = adat.copy()
         for xlsx_col, adat_col in xlsx_to_adat_column_map.items():
             if adat_col not in adat.columns.names:
@@ -219,7 +231,16 @@ def lift_adat(self, adat: Adat, lift_to_version: str = None) -> Adat:
             an_lifting_column = f'{matrix} Scalar {signal_space} to {lift_to_version}'
         scalars = self[an_lifting_column].copy().fillna(1.0)
         # I don't want to modify the annotations in case the object is used elsewhere.
-        scalars.index = self['SeqId']
+
+        # Check if SeqId is in index or columns and set scalars index appropriately
+        if 'SeqId' in self.index.names:
+            scalars.index = self.index.get_level_values('SeqId')
+        elif 'SeqId' in self.columns:
+            scalars.index = self['SeqId']
+        else:
+            raise ValueError(
+                "SeqId not found in either index or columns of annotations data"
+            )
 
         # Check if seq ids will broadcast between adat & annotations (symmetric difference)
         sym_diff = set(scalars.index) ^ set(adat.columns.get_level_values('SeqId'))

diff --git a/somadata/io/annotations/file.py b/somadata/io/annotations/file.py
@@ -19,6 +19,8 @@
     [
         '7d92666369d4e33364b11804f2d1f8ce',  # v4 rev 2 as of 2021-08-27
         '5fa46834ed826eb1e8dba88698cf7a76',  # v4.1 rev 2 as of 2021-08-27
+        '26c9f22d083bc3eac871e2dd00586c31',  # v4.1 plasma/serum rev 10 as of 2025-03
+        'd93180bdd291af5d07faafec92823661',  # v5.0 plasma/serum  rev 7 as of 2025-03
     ]
 )
 

diff --git a/tests/test_annotation_lifting.py b/tests/test_annotation_lifting.py
@@ -6,19 +6,48 @@
 from somadata import Adat, Annotations
 from somadata.errors import AnnotationsLiftingError
 
+# Test data constants
+ANNOTATION_CSV_DATA = 'SeqId,SomaId,Plasma Scalar v4.0 5K to v4.1 7K\n54321-21,SL054321,0.8\n12345-12,SL012345,1.1\n'
+ANNOTATION_CSV_DATA_NEW_SOMA_IDS = 'SeqId,SomaId,Plasma Scalar v4.0 5K to v4.1 7K\n54321-21,SLNEW1,0.8\n12345-12,SLNEW2,1.1\n'
+ANNOTATION_CSV_DATA_NO_SEQID = (
+    'SomaId,Plasma Scalar v4.0 5K to v4.1 7K\nSL054321,0.8\nSL012345,1.1\n'
+)
+ANNOTATION_CSV_DATA_NO_SEQID_NEW_IDS = (
+    'SomaId,Plasma Scalar v4.0 5K to v4.1 7K\nSLNEW1,0.8\nSLNEW2,1.1\n'
+)
+
+EXPECTED_LIFTED_VALUES = [[1.1, 1.6], [4.4, 4]]
+EXPECTED_NEW_SOMA_IDS = ['SLNEW2', 'SLNEW1']
+
+
+def create_annotations_from_csv(csv_data: str, index_col=False) -> Annotations:
+    """Create an Annotations object from CSV string data.
+
+    Args:
+        csv_data: CSV string data
+        index_col: Column to use as index, or False for default index
+
+    Returns:
+        Annotations object
+    """
+    s = StringIO(csv_data)
+    df = pd.read_csv(s, index_col=index_col)
+    if index_col is False:
+        return Annotations(data=df.values, index=df.index, columns=df.columns)
+    else:
+        return Annotations(df)
+
 
 def build_annotation_example():
-    s = StringIO(
-        'SeqId,SomaId,Plasma Scalar v4.0 5K to v4.1 7K\n54321-21,SL054321,0.8\n12345-12,SL012345,1.1\n'
-    )
-    df = pd.read_csv(s, index_col=False)
-    return Annotations(data=df.values, index=df.index, columns=df.columns)
+    """Build standard annotation example with SeqId as column."""
+    return create_annotations_from_csv(ANNOTATION_CSV_DATA, index_col=False)
 
 
 def build_good_example_adat():
     rfu_data = [[1, 2], [4, 5]]
     col_metadata = {
         'SeqId': ['12345-12', '54321-21'],
+        'SomaId': ['SL012345', 'SL054321'],
         'SeqIdVersion': ['1', '2'],
         'ColCheck': ['PASS', 'FLAG'],
     }
@@ -36,6 +65,7 @@ def build_example_adat_with_extra_somamers():
     rfu_data = [[1, 2, 3], [4, 5, 6]]
     col_metadata = {
         'SeqId': ['12345-12', '54321-21', '23456-78'],
+        'SomaId': ['SL012345', 'SL054321', 'SL023456'],
         'SeqIdVersion': ['1', '2', '3'],
         'ColCheck': ['PASS', 'FLAG', 'FLAG'],
     }
@@ -54,13 +84,26 @@ def setUp(self):
         self.an = build_annotation_example()
         self.adat = build_good_example_adat()
 
-    def test_lifting_correct(self):
-        lifted_adat = self.an.lift_adat(self.adat)
-        correct_lifted_values = [[1.1, 1.6], [4.4, 4]]
-        for correct_row, lifted_row in zip(correct_lifted_values, lifted_adat.values):
+    def _assert_lifting_values_correct(
+        self, lifted_adat, expected_values=EXPECTED_LIFTED_VALUES
+    ):
+        """Helper method to assert lifting calculations are correct."""
+        for correct_row, lifted_row in zip(expected_values, lifted_adat.values):
             for correct_value, lifted_value in zip(correct_row, lifted_row):
                 self.assertAlmostEqual(correct_value, lifted_value)
 
+    def _assert_soma_ids_match(self, adat, expected_ids=EXPECTED_NEW_SOMA_IDS):
+        """Helper method to assert SomaId values match expected."""
+        self.assertEqual(list(adat.columns.get_level_values('SomaId')), expected_ids)
+
+    def _assert_error_contains_message(self, context_manager, expected_message):
+        """Helper method to assert error contains expected message."""
+        self.assertIn(expected_message, str(context_manager.exception))
+
+    def test_lifting_correct(self):
+        lifted_adat = self.an.lift_adat(self.adat)
+        self._assert_lifting_values_correct(lifted_adat)
+
         self.assertEqual(lifted_adat.header_metadata['SignalSpace'], 'v4.1')
         self.assertTrue(
             lifted_adat.header_metadata['!ProcessSteps'].endswith(
@@ -131,3 +174,50 @@ def test_analyte_mismatch(self):
             e_msg,
             'Unable to perform lifting due to analyte mismatch between adat & annotations. Has either file been modified?',
         )
+
+    def test_update_adat_column_meta(self):
+        an = create_annotations_from_csv(
+            ANNOTATION_CSV_DATA_NEW_SOMA_IDS, index_col='SeqId'
+        )
+        new_adat = an.update_adat_column_meta(self.adat)
+        self._assert_soma_ids_match(new_adat)
+
+    def test_update_adat_column_meta_seqid_as_column(self):
+        """Test update_adat_column_meta when SeqId is a regular column (not index)"""
+        an = create_annotations_from_csv(
+            ANNOTATION_CSV_DATA_NEW_SOMA_IDS, index_col=False
+        )
+        new_adat = an.update_adat_column_meta(self.adat)
+        self._assert_soma_ids_match(new_adat)
+
+    def test_update_adat_column_meta_missing_seqid(self):
+        """Test update_adat_column_meta when SeqId is missing entirely"""
+        an = create_annotations_from_csv(
+            ANNOTATION_CSV_DATA_NO_SEQID_NEW_IDS, index_col=False
+        )
+        with self.assertRaises(ValueError) as cm:
+            an.update_adat_column_meta(self.adat)
+        self._assert_error_contains_message(
+            cm, "SeqId not found in either index or columns"
+        )
+
+    def test_lift_adat_seqid_as_index(self):
+        """Test lift_adat when SeqId is in the index"""
+        an = create_annotations_from_csv(ANNOTATION_CSV_DATA, index_col='SeqId')
+        lifted_adat = an.lift_adat(self.adat)
+        self._assert_lifting_values_correct(lifted_adat)
+
+    def test_lift_adat_seqid_as_column(self):
+        """Test lift_adat when SeqId is a regular column (not index)"""
+        an = create_annotations_from_csv(ANNOTATION_CSV_DATA, index_col=False)
+        lifted_adat = an.lift_adat(self.adat)
+        self._assert_lifting_values_correct(lifted_adat)
+
+    def test_lift_adat_missing_seqid(self):
+        """Test lift_adat when SeqId is missing entirely"""
+        an = create_annotations_from_csv(ANNOTATION_CSV_DATA_NO_SEQID, index_col=False)
+        with self.assertRaises(ValueError) as cm:
+            an.lift_adat(self.adat)
+        self._assert_error_contains_message(
+            cm, "SeqId not found in either index or columns"
+        )