Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[project]
name = "somadata"
version = "1.2.5"
version = "1.2.6"
description = "SomaLogic Python Data Input/Output Library"
authors = [
{name = "SomaLogic", email = "support@somalogic.com"},
Expand Down
17 changes: 14 additions & 3 deletions somadata/base/adat_math_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,18 @@


def calcELOD(x: pd.Series):
"""Calculated the estimated limit of detection based on x a pd.Series of SomaScan measurements of buffer."""
"""Calculate the estimated limit of detection based on x a pd.Series of SomaScan measurements of buffer.

Parameters
----------
x : pd.Series
A pandas Series of SomaScan measurements of buffer.

Returns
-------
float
The estimated limit of detection (eLOD).
"""
med = np.median(x)
absDiff = np.abs(x - med)
medDiff = np.median(absDiff)
Expand Down Expand Up @@ -93,10 +104,10 @@ def lift(self, lift_to_version: str = None):

Examples
--------
>>> # the adat stores the current assay version. This value is used by the tool to select the correct reference but you are not required to ender it.
>>> # the adat stores the current assay version. This value is used by the tool to select the correct reference but you are not required to enter it.
>>> adat.header_metadata['!AssayVersion']
'v5.0'
>>> # the adat stores the matrix. This value is used by the tool to select the correct reference but you are not required to ender it.
>>> # the adat stores the matrix. This value is used by the tool to select the correct reference but you are not required to enter it.
>>> adat.header_metadata['StudyMatrix']
'EDTA Plasma'
>>> lifted_adat = adat.lift('v4.1') # lifting to the previous assay version.
Expand Down
47 changes: 23 additions & 24 deletions somadata/base/adat_meta_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _filter_on_meta(
self,
axis: int,
name: str,
values: Union[List(str), Set(str), Tuple(str)],
values: Union[List[str], Set[str], Tuple[str]],
include: bool = True,
) -> Adat:
# Check to see if values is the right variable type
Expand Down Expand Up @@ -61,7 +61,7 @@ def _filter_on_meta(
return adat.copy()

def _filter_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)], include: bool
self, axis: int, names: Union[List[str], Set[str], Tuple[str]], include: bool
) -> Adat:
# Check to see if names is the right variable type
if not isinstance(names, (list, tuple, set)):
Expand Down Expand Up @@ -94,7 +94,7 @@ def _filter_meta(
return adat

def _insert_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)], replace: bool
self, axis: int, name: str, values: Union[List[str], Tuple[str]], replace: bool
) -> Adat:
adat = self.copy()
if axis == 0:
Expand Down Expand Up @@ -127,7 +127,7 @@ def _insert_meta(
return adat

def exclude_on_meta(
self, axis: int, name: str, values: Union[List(str), Set(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with rfu rows or columns excluded given the multiindex name and values to exclude on.

Expand All @@ -141,7 +141,7 @@ def exclude_on_meta(
name : str
The name of the metadata/multiindex row/column to filter based on.

values : List(str) | Set(str) | Tuple(str)
values : List[str] | Set[str] | Tuple[str]
The values to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -157,7 +157,7 @@ def exclude_on_meta(
return self._filter_on_meta(axis, name, values, include=False)

def pick_on_meta(
self, axis: int, name: str, values: Union[List(str), Set(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with rfu rows or columns excluded given the multiindex name and values to keep.

Expand All @@ -171,7 +171,7 @@ def pick_on_meta(
name : str
The name of the metadata/multiindex row/column to filter based on.

values : List(str) | Set(str) | Tuple(str)
values : List[str] | Set[str] | Tuple[str]
The values to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -188,7 +188,7 @@ def pick_on_meta(
return self._filter_on_meta(axis, name, values, include=True)

def pick_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)]
self, axis: int, names: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with excluded metadata/multiindices given the names to keep.

Expand All @@ -199,7 +199,7 @@ def pick_meta(
0 - row metadata,
1 - column metadata

names : List(str) | Set(str) | Tuple(str)
names : List[str] | Set[str] | Tuple[str]
The names to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -214,7 +214,7 @@ def pick_meta(
return self._filter_meta(axis, names, include=True)

def exclude_meta(
self, axis: int, names: Union[List(str), Set(str), Tuple(str)]
self, axis: int, names: Union[List[str], Set[str], Tuple[str]]
) -> Adat:
"""Returns an adat with excluded metadata/multiindices given the names to exclude.

Expand All @@ -225,7 +225,7 @@ def exclude_meta(
0 - row metadata,
1 - column metadata

names : List(str) | Set(str) | Tuple(str)
names : List[str] | Set[str] | Tuple[str]
The names to filter on. Can be a tuple, list, or set.

Returns
Expand All @@ -241,7 +241,7 @@ def exclude_meta(
return self._filter_meta(axis, names, include=False)

def insert_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Tuple[str]]
) -> Adat:
"""Returns an adat with the given metadata/multiindices added.

Expand All @@ -257,7 +257,7 @@ def insert_meta(
name : str
The name of the index to be added.

values : List(str) | Tuple(str)
values : List[str] | Tuple[str]
Values to be added to the metadata/multiindex. Can be a tuple or list

Returns
Expand All @@ -272,7 +272,7 @@ def insert_meta(
return self._insert_meta(axis, name, values, replace=False)

def replace_meta(
self, axis: int, name: str, values: Union[List(str), Tuple(str)]
self, axis: int, name: str, values: Union[List[str], Tuple[str]]
) -> Adat:
"""Returns an adat with the given metadata/multiindices added.

Expand All @@ -288,7 +288,7 @@ def replace_meta(
name : str
The name of the index to be added.

values : List(str) | Tuple(str)
values : List[str] | Tuple[str]
Values to be added to the metadata/multiindex. Can be a tuple or list

Returns
Expand All @@ -307,7 +307,7 @@ def insert_keyed_meta(
axis: int,
inserted_meta_name: str,
key_meta_name: str,
values_dict: Dict(str, str),
values_dict: Dict[str, str],
) -> Adat:
"""Inserts metadata into Adat given a dictionary of values keyed to existing metadata.

Expand All @@ -327,7 +327,7 @@ def insert_keyed_meta(
key_meta_name : str
The name of the index to use as the key-map.

values_dict : Dict(str, str)
values_dict : Dict[str, str]
Values to be added to the metadata/multiindex keyed to the existing values in `key_meta_name`.

Returns
Expand All @@ -337,7 +337,7 @@ def insert_keyed_meta(
Examples
--------
>>> new_adat = adat.insert_keyed_meta(axis=0, inserted_meta_name='NewBarcode', key_meta_name='Barcode', values_dict={"J12345": "1"})
>>> new_adat = adat.insert_keyed_meta(axis=1, inserted_meta_name='NewProteinType', key_meta_name='Type', values_dict={"Protein": "Buffer")
>>> new_adat = adat.insert_keyed_meta(axis=1, inserted_meta_name='NewProteinType', key_meta_name='Type', values_dict={"Protein": "Buffer"})
"""

values = []
Expand Down Expand Up @@ -366,7 +366,7 @@ def replace_keyed_meta(
self,
axis: int,
replaced_meta_name: str,
values_dict: Dict(str, str),
values_dict: Dict[str, str],
key_meta_name: str = None,
) -> Adat:
"""Updates metadata in an Adat given a dictionary of values keyed to existing metadata.
Expand All @@ -387,7 +387,7 @@ def replace_keyed_meta(
key_meta_name : str, optional
The name of the index to use as the key-map. Will default to `replaced_meta_name` if None.

values_dict : Dict(str, str)
values_dict : Dict[str, str]
Values to be added to the metadata/multiindex keyed to the existing values in `key_meta_name`.

Returns
Expand All @@ -396,8 +396,8 @@ def replace_keyed_meta(

Examples
--------
>>> new_adat = adat.replace_keyed_meta(axis=0, inserted_meta_name='Barcode', key_meta_name='SampleType', values_dict={"J12345": "Calibrator"})
>>> new_adat = adat.replace_keyed_meta(axis=1, inserted_meta_name='Type', key_meta_name='SeqId', values_dict={"12345-6": "ProteinSet1")
>>> new_adat = adat.replace_keyed_meta(axis=0, replaced_meta_name='Barcode', key_meta_name='SampleType', values_dict={"J12345": "Calibrator"})
>>> new_adat = adat.replace_keyed_meta(axis=1, replaced_meta_name='Type', key_meta_name='SeqId', values_dict={"12345-6": "ProteinSet1"})
"""

key_meta_name = key_meta_name or replaced_meta_name
Expand Down Expand Up @@ -443,7 +443,6 @@ def update_somamer_metadata_from_adat(self, adat: Adat) -> Adat:
Examples
--------
>>> new_adat = adat.update_somamer_metadata_from_adat(other_adat)
>>> new_adat = adat.update_somamer_metadata_from_adat(other_adat)
"""

# Check to make sure seq_ids & order are identical
Expand Down Expand Up @@ -517,7 +516,7 @@ def reorder_on_metadata(self, axis: int, name: str, source_adat: Adat) -> Adat:

Examples
--------
>>> new_adat = adat.reorder_on_metadata(axis=1, name='SeqId', other_adat)
>>> new_adat = adat.reorder_on_metadata(axis=1, name='SeqId', source_adat=other_adat)
"""
reorder_index = []
adat = self.copy()
Expand Down
55 changes: 43 additions & 12 deletions somadata/data/lift.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@


def check_substrings(full, sub1, sub2):
"""Check if two substrings exist in a string and if sub1 appears before sub2.

Parameters
----------
full : str
The full string to search in.
sub1 : str
The first substring to find.
sub2 : str
The second substring to find.

Returns
-------
bool
True if both substrings are in full and sub1 appears before sub2, False otherwise.
"""
if not sub1 in full:
return False
if not sub2 in full:
Expand All @@ -16,7 +32,14 @@ def check_substrings(full, sub1, sub2):


def getSomaScanLiftCCC():
"""Return the SomaScan Lifting Lin's CCC DataFrame."""
"""Return the SomaScan Lifting Lin's CCC DataFrame.

Returns
-------
pd.DataFrame
A DataFrame containing Lin's Concordance Correlation Coefficient (CCC) values
for SomaScan lifting across different assay versions.
"""
data = []
versions = ['s11k.json', 's7k.json', 's5k.json']
with ZipFile(zip_path, 'r') as zp:
Expand All @@ -40,12 +63,16 @@ class LiftData:
def __init__(self, from_plex, to_plex, matrix):
"""Instantiate a LiftData Object.

Parameters:
from_plex: The SomaScan assay version to lift from i.e. v5.0.
to_plex: The SomaScan assay version to lift to ie i.e. v4.1
matrix: The matrix you would like a reference for. 'serum' and 'plasma' are supported.
Parameters
----------
from_plex : str
The SomaScan assay version to lift from i.e. v5.0.
to_plex : str
The SomaScan assay version to lift to ie i.e. v4.1
matrix : str
The matrix you would like a reference for. 'serum' and 'plasma' are supported.
"""
# instantiate these variables they should not persist accross the class.
# instantiate these variables they should not persist across the class.
self._scale_factors = pd.Series(dtype='float')
self._matrix = None
self._lins_ccc = pd.Series(dtype='float')
Expand All @@ -66,11 +93,15 @@ def _read_zip(self, from_plex):
def _get_colname(self, kind='Scalar'):
"""Iterate through the column names and find the one that matches the __init__ parameters and kind.

Parameters:
kind: str. 'Scalar' or 'CCC' along with the assay versions determines the column name returned.
Parameters
----------
kind : str
'Scalar' or 'CCC' along with the assay versions determines the column name returned.

Returns:
col: str. A column name from the reference data.
Returns
-------
col : str
A column name from the reference data.
"""
mat = self.matrix.capitalize()
for col in self._df.columns:
Expand All @@ -90,7 +121,7 @@ def _get_colname(self, kind='Scalar'):
)

def _extract_reference(self):
"""From the reference DataFrame scalars and CCC for the target matrix and target version."""
"""Extract the reference DataFrame scalars and CCC for the target matrix and target version."""
self._scale_factors = self._df[self._get_colname(kind='Scalar')]
self._lins_ccc = self._df[self._get_colname(kind='CCC')]

Expand All @@ -112,8 +143,8 @@ def scale_factors(self, scale_factors):

@property
def lins_ccc(self):
"""Lazy load Lin's CCC."""
if self._lins_ccc.empty:
"""Lazy load Lin's CCC."""
self._extract_reference()
return self._lins_ccc

Expand Down
10 changes: 4 additions & 6 deletions somadata/io/adat/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

def parse_file(
f: Union[str, io.TextIOWrapper], compatibility_mode: bool = False
) -> Tuple[
List[List[float]], Dict[str, List[str]], Dict[str, List[str]], Dict[str, str]
]:
) -> Tuple[List[List[float]], Dict[str, List[str]], Dict[str, List], Dict[str, str]]:
"""Returns component pieces of an adat given an adat file object.

Parameters
Expand All @@ -37,9 +35,9 @@ def parse_file(
A dictionary of each column of the row metadata where the key-value
pairs are column-name and an array of each sample's corresponding metadata

column_metadata : Dict[str, List[str]]
column_metadata : Dict[str, List]
A dictionary of each row of the adat column metadata where the key-value pairs are
row-name and an array of each somamer's corresponding metadata.
row-name and an array of each somamer's corresponding metadata (mixed types).

header_metadata : Dict[str, str]
A dictionary of each row of the header_metadata corresponds to a key-value pair.
Expand Down Expand Up @@ -201,7 +199,7 @@ def read_adat(path_or_buf: Union[str, io.TextIOWrapper], *args, **kwargs) -> Ada

Examples
--------
>>> adat = Adat.from_file('path/to/file.adat')
>>> adat = read_adat('path/to/file.adat')

Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions somadata/tools/adat_concatenation.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _concat_header_metadata(adats: List[Adat], merge_strategy=None):
return base_header


def _concat_column_metadata(adats: List[Adat]) -> Dict(str, List):
def _concat_column_metadata(adats: List[Adat]) -> Dict[str, List]:
# Get Col Metadata
col_metadata = {}
col_checks = []
Expand Down Expand Up @@ -163,7 +163,7 @@ def _concat_column_metadata(adats: List[Adat]) -> Dict(str, List):
return col_metadata


def _concat_row_metadata(adats: List[Adat]) -> Dict(str, List):
def _concat_row_metadata(adats: List[Adat]) -> Dict[str, List]:
# Check if Row Metadata Matches
names = set(adats[0].index.names)
symmetric_difference = set()
Expand Down