Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[project]
name = "somadata"
version = "1.2.5"
version = "1.2.6"
description = "SomaLogic Python Data Input/Output Library"
authors = [
{name = "SomaLogic", email = "support@somalogic.com"},
Expand Down
17 changes: 12 additions & 5 deletions somadata/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,27 @@ def __setitem__(self, key, val):

def __delitem__(self, key) -> None:
super().__delitem__(key)
if LIFTING_COLUMN_REGEX.match(key):
if isinstance(key, str) and LIFTING_COLUMN_REGEX.match(key):
self._update_supported_lifting_options()

def _update_supported_lifting_options(self):
self.supported_lifting_matrices = set()
self.supported_lifting_signal_space = set()

for name in self.columns:
if LIFTING_COLUMN_REGEX.match(name):
if isinstance(name, str) and LIFTING_COLUMN_REGEX.match(name):
supported_info = name.split(' ')
self.supported_lifting_matrices.add(supported_info[0])
self.supported_lifting_signal_space.add(
(supported_info[2], supported_info[5])
)
if len(supported_info) >= 7:
# New format with matrix sizes (e.g., "11K", "7K")
self.supported_lifting_signal_space.add(
(supported_info[2], supported_info[5])
)
elif len(supported_info) >= 5:
# Old format without matrix sizes
self.supported_lifting_signal_space.add(
(supported_info[2], supported_info[4])
)

def update_adat_column_meta(self, adat: Adat) -> Adat:
"""Utility to update a provided adat's column metadata to match the annotations object.
Expand Down
17 changes: 14 additions & 3 deletions somadata/base/adat_math_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,18 @@


def calcELOD(x: pd.Series):
"""Calculated the estimated limit of detection based on x a pd.Series of SomaScan measurements of buffer."""
"""Calculate the estimated limit of detection based on x a pd.Series of SomaScan measurements of buffer.

Parameters
----------
x : pd.Series
A pandas Series of SomaScan measurements of buffer.

Returns
-------
float
The estimated limit of detection (eLOD).
"""
med = np.median(x)
absDiff = np.abs(x - med)
medDiff = np.median(absDiff)
Expand Down Expand Up @@ -93,10 +104,10 @@ def lift(self, lift_to_version: str = None):

Examples
--------
>>> # the adat stores the current assay version. This value is used by the tool to select the correct reference but you are not required to ender it.
>>> # the adat stores the current assay version. This value is used by the tool to select the correct reference but you are not required to enter it.
>>> adat.header_metadata['!AssayVersion']
'v5.0'
>>> # the adat stores the matrix. This value is used by the tool to select the correct reference but you are not required to ender it.
>>> # the adat stores the matrix. This value is used by the tool to select the correct reference but you are not required to enter it.
>>> adat.header_metadata['StudyMatrix']
'EDTA Plasma'
>>> lifted_adat = adat.lift('v4.1') # lifting to the previous assay version.
Expand Down
47 changes: 23 additions & 24 deletions somadata/base/adat_meta_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _filter_on_meta(
self,
axis: int,
name: str,
values: Union[List(str), Set(str), Tuple(str)],
values: Union[List[str], Set[str], Tuple[str]],
include: bool = True,
) -> Adat:
# Check to see if values is the right variable type
Expand Down Expand Up @@ -61,7 +61,7 @@ def _filter_on_meta(
return adat.copy()

def _filter_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)], include: bool
self, axis: int, names: Union[List[str], Set[str], Tuple[str]], include: bool
) -> Adat:
# Check to see if names is the right variable type
if not isinstance(names, (list, tuple, set)):
Expand Down Expand Up @@ -94,7 +94,7 @@ def _filter_meta(
return adat

def _insert_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)], replace: bool
self, axis: int, name: str, values: Union[List[str], Tuple[str]], replace: bool
) -> Adat:
adat = self.copy()
if axis == 0:
Expand Down Expand Up @@ -127,7 +127,7 @@ def _insert_meta(
return adat

def exclude_on_meta(
self, axis: int, name: str, values: Union[List(str), Set(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with rfu rows or columns excluded given the multiindex name and values to exclude on.

Expand All @@ -141,7 +141,7 @@ def exclude_on_meta(
name : str
The name of the metadata/multiindex row/column to filter based on.

values : List(str) | Set(str) | Tuple(str)
values : List[str] | Set[str] | Tuple[str]
The values to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -157,7 +157,7 @@ def exclude_on_meta(
return self._filter_on_meta(axis, name, values, include=False)

def pick_on_meta(
self, axis: int, name: str, values: Union[List(str), Set(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with rfu rows or columns excluded given the multiindex name and values to keep.

Expand All @@ -171,7 +171,7 @@ def pick_on_meta(
name : str
The name of the metadata/multiindex row/column to filter based on.

values : List(str) | Set(str) | Tuple(str)
values : List[str] | Set[str] | Tuple[str]
The values to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -188,7 +188,7 @@ def pick_on_meta(
return self._filter_on_meta(axis, name, values, include=True)

def pick_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)]
self, axis: int, names: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with excluded metadata/multiindices given the names to keep.

Expand All @@ -199,7 +199,7 @@ def pick_meta(
0 - row metadata,
1 - column metadata

names : List(str) | Set(str) | Tuple(str)
names : List[str] | Set[str] | Tuple[str]
The names to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -214,7 +214,7 @@ def pick_meta(
return self._filter_meta(axis, names, include=True)

def exclude_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)]
self, axis: int, names: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with excluded metadata/multiindices given the names to exclude.

Expand All @@ -225,7 +225,7 @@ def exclude_meta(
0 - row metadata,
1 - column metadata

names : List(str) | Set(str) | Tuple(str)
names : List[str] | Set[str] | Tuple[str]
The names to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -241,7 +241,7 @@ def exclude_meta(
return self._filter_meta(axis, names, include=False)

def insert_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Tuple[str]]
) -> Adat:
"""Returns an adat with the given metadata/multiindices added.

Expand All @@ -257,7 +257,7 @@ def insert_meta(
name : str
The name of the index to be added.

values : List(str) | Tuple(str)
values : List[str] | Tuple[str]
Values to be added to the metadata/multiindex. Can be a tuple or list

Returns
Expand All @@ -272,7 +272,7 @@ def insert_meta(
return self._insert_meta(axis, name, values, replace=False)

def replace_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Tuple[str]]
) -> Adat:
"""Returns an adat with the given metadata/multiindices added.

Expand All @@ -288,7 +288,7 @@ def replace_meta(
name : str
The name of the index to be added.

values : List(str) | Tuple(str)
values : List[str] | Tuple[str]
Values to be added to the metadata/multiindex. Can be a tuple or list

Returns
Expand All @@ -307,7 +307,7 @@ def insert_keyed_meta(
axis: int,
inserted_meta_name: str,
key_meta_name: str,
values_dict: Dict(str, str),
values_dict: Dict[str, str],
) -> Adat:
"""Inserts metadata into Adat given a dictionary of values keyed to existing metadata.

Expand All @@ -327,7 +327,7 @@ def insert_keyed_meta(
key_meta_name : str
The name of the index to use as the key-map.

values_dict : Dict(str, str)
values_dict : Dict[str, str]
Values to be added to the metadata/multiindex keyed to the existing values in `key_meta_name`.

Returns
Expand All @@ -337,7 +337,7 @@ def insert_keyed_meta(
Examples
--------
>>> new_adat = adat.insert_keyed_meta(axis=0, inserted_meta_name='NewBarcode', key_meta_name='Barcode', values_dict={"J12345": "1"})
>>> new_adat = adat.insert_keyed_meta(axis=1, inserted_meta_name='NewProteinType', key_meta_name='Type', values_dict={"Protein": "Buffer")
>>> new_adat = adat.insert_keyed_meta(axis=1, inserted_meta_name='NewProteinType', key_meta_name='Type', values_dict={"Protein": "Buffer"})
"""

values = []
Expand Down Expand Up @@ -366,7 +366,7 @@ def replace_keyed_meta(
self,
axis: int,
replaced_meta_name: str,
values_dict: Dict(str, str),
values_dict: Dict[str, str],
key_meta_name: str = None,
) -> Adat:
"""Updates metadata in an Adat given a dictionary of values keyed to existing metadata.
Expand All @@ -387,7 +387,7 @@ def replace_keyed_meta(
key_meta_name : str, optional
The name of the index to use as the key-map. Will default to `replaced_meta_name` if None.

values_dict : Dict(str, str)
values_dict : Dict[str, str]
Values to be added to the metadata/multiindex keyed to the existing values in `key_meta_name`.

Returns
Expand All @@ -396,8 +396,8 @@ def replace_keyed_meta(

Examples
--------
>>> new_adat = adat.replace_keyed_meta(axis=0, inserted_meta_name='Barcode', key_meta_name='SampleType', values_dict={"J12345": "Calibrator"})
>>> new_adat = adat.replace_keyed_meta(axis=1, inserted_meta_name='Type', key_meta_name='SeqId', values_dict={"12345-6": "ProteinSet1")
>>> new_adat = adat.replace_keyed_meta(axis=0, replaced_meta_name='Barcode', key_meta_name='SampleType', values_dict={"J12345": "Calibrator"})
>>> new_adat = adat.replace_keyed_meta(axis=1, replaced_meta_name='Type', key_meta_name='SeqId', values_dict={"12345-6": "ProteinSet1"})
"""

key_meta_name = key_meta_name or replaced_meta_name
Expand Down Expand Up @@ -443,7 +443,6 @@ def update_somamer_metadata_from_adat(self, adat: Adat) -> Adat:
Examples
--------
>>> new_adat = adat.update_somamer_metadata_from_adat(other_adat)
>>> new_adat = adat.update_somamer_metadata_from_adat(other_adat)
"""

# Check to make sure seq_ids & order are identical
Expand Down Expand Up @@ -517,7 +516,7 @@ def reorder_on_metadata(self, axis: int, name: str, source_adat: Adat) -> Adat:

Examples
--------
>>> new_adat = adat.reorder_on_metadata(axis=1, name='SeqId', other_adat)
>>> new_adat = adat.reorder_on_metadata(axis=1, name='SeqId', source_adat=other_adat)
"""
reorder_index = []
adat = self.copy()
Expand Down
55 changes: 43 additions & 12 deletions somadata/data/lift.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@


def check_substrings(full, sub1, sub2):
"""Check if two substrings exist in a string and if sub1 appears before sub2.

Parameters
----------
full : str
The full string to search in.
sub1 : str
The first substring to find.
sub2 : str
The second substring to find.

Returns
-------
bool
True if both substrings are in full and sub1 appears before sub2, False otherwise.
"""
if not sub1 in full:
return False
if not sub2 in full:
Expand All @@ -16,7 +32,14 @@ def check_substrings(full, sub1, sub2):


def getSomaScanLiftCCC():
"""Return the SomaScan Lifting Lin's CCC DataFrame."""
"""Return the SomaScan Lifting Lin's CCC DataFrame.

Returns
-------
pd.DataFrame
A DataFrame containing Lin's Concordance Correlation Coefficient (CCC) values
for SomaScan lifting across different assay versions.
"""
data = []
versions = ['s11k.json', 's7k.json', 's5k.json']
with ZipFile(zip_path, 'r') as zp:
Expand All @@ -40,12 +63,16 @@ class LiftData:
def __init__(self, from_plex, to_plex, matrix):
"""Instantiate a LiftData Object.

Parameters:
from_plex: The SomaScan assay version to lift from i.e. v5.0.
to_plex: The SomaScan assay version to lift to ie i.e. v4.1
matrix: The matrix you would like a reference for. 'serum' and 'plasma' are supported.
Parameters
----------
from_plex : str
The SomaScan assay version to lift from i.e. v5.0.
to_plex : str
The SomaScan assay version to lift to ie i.e. v4.1
matrix : str
The matrix you would like a reference for. 'serum' and 'plasma' are supported.
"""
# instantiate these variables they should not persist accross the class.
# instantiate these variables they should not persist across the class.
self._scale_factors = pd.Series(dtype='float')
self._matrix = None
self._lins_ccc = pd.Series(dtype='float')
Expand All @@ -66,11 +93,15 @@ def _read_zip(self, from_plex):
def _get_colname(self, kind='Scalar'):
"""Iterate through the column names and find the one that matches the __init__ parameters and kind.

Parameters:
kind: str. 'Scalar' or 'CCC' along with the assay versions determines the column name returned.
Parameters
----------
kind : str
'Scalar' or 'CCC' along with the assay versions determines the column name returned.

Returns:
col: str. A column name from the reference data.
Returns
-------
col : str
A column name from the reference data.
"""
mat = self.matrix.capitalize()
for col in self._df.columns:
Expand All @@ -90,7 +121,7 @@ def _get_colname(self, kind='Scalar'):
)

def _extract_reference(self):
"""From the reference DataFrame scalars and CCC for the target matrix and target version."""
"""Extract the reference DataFrame scalars and CCC for the target matrix and target version."""
self._scale_factors = self._df[self._get_colname(kind='Scalar')]
self._lins_ccc = self._df[self._get_colname(kind='CCC')]

Expand All @@ -112,8 +143,8 @@ def scale_factors(self, scale_factors):

@property
def lins_ccc(self):
"""Lazy load Lin's CCC."""
if self._lins_ccc.empty:
"""Lazy load Lin's CCC."""
self._extract_reference()
return self._lins_ccc

Expand Down
Loading
Loading