diff --git a/src/scirpy/ir_dist/__init__.py b/src/scirpy/ir_dist/__init__.py index d5e68fc37..e1680495c 100644 --- a/src/scirpy/ir_dist/__init__.py +++ b/src/scirpy/ir_dist/__init__.py @@ -4,11 +4,12 @@ from typing import Literal import numpy as np +import pandas as pd from scanpy import logging from scipy.sparse import csr_matrix from scirpy.get import airr as get_airr -from scirpy.util import DataHandler, _doc_params, _is_na, deprecated +from scirpy.util import DataHandler, _doc_params, deprecated from . import metrics @@ -237,7 +238,7 @@ def _get_unique_seqs(tmp_adata, chain_type): tmp_seqs = np.concatenate( [get_airr(tmp_adata, key, f"{chain_type}_{chain_id}").values for chain_id in ["1", "2"]] # type: ignore ) - return np.unique([x.upper() for x in tmp_seqs[~_is_na(tmp_seqs)]]) + return np.unique([x.upper() for x in tmp_seqs[~pd.isna(tmp_seqs)]]) for i, tmp_params in enumerate([params, params_ref]): if tmp_params is not None: diff --git a/src/scirpy/pl/_vdj_usage.py b/src/scirpy/pl/_vdj_usage.py index db7bf2d15..133179a7f 100644 --- a/src/scirpy/pl/_vdj_usage.py +++ b/src/scirpy/pl/_vdj_usage.py @@ -4,10 +4,11 @@ import matplotlib.pyplot as plt import numpy as np +import pandas as pd from scirpy.get import airr as get_airr from scirpy.io import AirrCell -from scirpy.util import DataHandler, _is_na, _normalize_counts +from scirpy.util import DataHandler, _normalize_counts from .styling import _init_ax @@ -163,7 +164,7 @@ def vdj_usage( # Draw gene segments for i, (segment_size, gene) in list(enumerate(zip(segment_sizes, genes, strict=False)))[:max_segments][::-1]: - if _is_na(gene): + if pd.isna(gene): gene = "none" gene_tops[col_name][gene] = bottom + segment_size if draw_bars: @@ -229,7 +230,7 @@ def vdj_usage( try: tmp_gene = ribbon[target_pair[0]] tmp_col = target_pair[0] - tmp_gene = "none" if _is_na(tmp_gene) else tmp_gene + tmp_gene = "none" if pd.isna(tmp_gene) else tmp_gene ribbon_color = gene_colors[tmp_col][tmp_gene] except KeyError: # Don't draw ribbon if the source gene is not drawn. @@ -237,7 +238,7 @@ def vdj_usage( for col_name in target_pair: gene = ribbon[col_name] - if _is_na(gene): + if pd.isna(gene): gene = "none" if gene not in tmp_gene_tops[col_name]: gene = "other" diff --git a/src/scirpy/tl/_chain_qc.py b/src/scirpy/tl/_chain_qc.py index 82938d4b2..ac0fcb5a1 100644 --- a/src/scirpy/tl/_chain_qc.py +++ b/src/scirpy/tl/_chain_qc.py @@ -7,7 +7,7 @@ from scanpy import logging from scirpy import get -from scirpy.util import DataHandler, _is_na +from scirpy.util import DataHandler @DataHandler.inject_param_docs() @@ -176,10 +176,10 @@ def _chain_pairing( logging.debug("Done initalizing") - mask_has_vj1 = ~_is_na(get.airr(params, "junction_aa", "VJ_1").values) - mask_has_vdj1 = ~_is_na(get.airr(params, "junction_aa", "VDJ_1").values) - mask_has_vj2 = ~_is_na(get.airr(params, "junction_aa", "VJ_2").values) - mask_has_vdj2 = ~_is_na(get.airr(params, "junction_aa", "VDJ_2").values) + mask_has_vj1 = ~pd.isna(get.airr(params, "junction_aa", "VJ_1").values) + mask_has_vdj1 = ~pd.isna(get.airr(params, "junction_aa", "VDJ_1").values) + mask_has_vj2 = ~pd.isna(get.airr(params, "junction_aa", "VJ_2").values) + mask_has_vdj2 = ~pd.isna(get.airr(params, "junction_aa", "VDJ_2").values) logging.debug("Done with masks") diff --git a/src/scirpy/tl/_clonal_expansion.py b/src/scirpy/tl/_clonal_expansion.py index 491c39098..d18076cef 100644 --- a/src/scirpy/tl/_clonal_expansion.py +++ b/src/scirpy/tl/_clonal_expansion.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from scirpy.util import DataHandler, _is_na, _normalize_counts +from scirpy.util import DataHandler, _normalize_counts def _clip_and_count( @@ -48,7 +48,7 @@ def _get_interval(value: int) -> str: .assign(tmp_count=lambda X: pd.Categorical(_get_interval(X["tmp_count"].values), categories=categories)) ) clipped_count = obs.merge(clonotype_counts, how="left", on=groupby_cols)["tmp_count"] - clipped_count[_is_na(obs[target_col])] = "nan" + clipped_count[pd.isna(obs[target_col])] = "nan" clipped_count.index = obs.index if inplace: @@ -170,7 +170,7 @@ def summarize_clonal_expansion( obs[tmp_col] = expansion # filter NA values - obs = obs.loc[~_is_na(obs[target_col]), :] + obs = obs.loc[~pd.isna(obs[target_col]), :] if summarize_by == "clone_id": obs.drop_duplicates(inplace=True) diff --git a/src/scirpy/tl/_clonotype_modularity.py b/src/scirpy/tl/_clonotype_modularity.py index 487b2a507..39f8564d2 100644 --- a/src/scirpy/tl/_clonotype_modularity.py +++ b/src/scirpy/tl/_clonotype_modularity.py @@ -2,13 +2,14 @@ from typing import Literal import numpy as np +import pandas as pd import scipy.sparse import scipy.stats from mudata import MuData from scanpy import logging from statsmodels.stats.multitest import fdrcorrection -from scirpy.util import DataHandler, _is_na, tqdm +from scirpy.util import DataHandler, tqdm from scirpy.util._negative_binomial import fit_nbinom from scirpy.util.graph import _get_igraph_from_adjacency @@ -107,7 +108,7 @@ def clonotype_modularity( n_permutations = 1000 if permutation_test == "approx" else 10000 clonotype_per_cell = params.get_obs(target_col) - cells_with_valid_clonotype = clonotype_per_cell[~_is_na(clonotype_per_cell.values)].index + cells_with_valid_clonotype = clonotype_per_cell[~pd.isna(clonotype_per_cell.values)].index data_subset = params.data[cells_with_valid_clonotype.values, :] try: connectivities = data_subset.obsp[connectivity_key] diff --git a/src/scirpy/tl/_diversity.py b/src/scirpy/tl/_diversity.py index 84a21a2e1..a10fd8323 100644 --- a/src/scirpy/tl/_diversity.py +++ b/src/scirpy/tl/_diversity.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from scirpy.util import DataHandler, _is_na +from scirpy.util import DataHandler def _shannon_entropy(counts: np.ndarray): @@ -105,7 +105,7 @@ def alpha_diversity( """ params = DataHandler(adata, airr_mod) ir_obs = params.get_obs([target_col, groupby]) - ir_obs = ir_obs.loc[~_is_na(ir_obs[target_col]), :] + ir_obs = ir_obs.loc[~pd.isna(ir_obs[target_col]), :] clono_counts = ir_obs.groupby([groupby, target_col], observed=True).size().reset_index(name="count") diversity = {} diff --git a/src/scirpy/tl/_group_abundance.py b/src/scirpy/tl/_group_abundance.py index b1cea2b7d..d4948d39e 100644 --- a/src/scirpy/tl/_group_abundance.py +++ b/src/scirpy/tl/_group_abundance.py @@ -5,7 +5,7 @@ import pandas as pd from scirpy.get import _has_ir -from scirpy.util import DataHandler, _is_na, _normalize_counts +from scirpy.util import DataHandler, _normalize_counts def _group_abundance( @@ -17,7 +17,7 @@ def _group_abundance( sort: Literal["count", "alphabetical"] | Sequence[str] = "count", ) -> pd.DataFrame: # remove NA rows - na_mask = _is_na(ir_obs[groupby]) | _is_na(ir_obs[target_col]) + na_mask = pd.isna(ir_obs[groupby]) | pd.isna(ir_obs[target_col]) ir_obs = ir_obs.loc[~na_mask, :] # normalize to fractions diff --git a/src/scirpy/tl/_ir_query.py b/src/scirpy/tl/_ir_query.py index 56b19cd12..b9c08ac9a 100644 --- a/src/scirpy/tl/_ir_query.py +++ b/src/scirpy/tl/_ir_query.py @@ -10,7 +10,7 @@ from scirpy.ir_dist import MetricType, _get_metric_key from scirpy.ir_dist._clonotype_neighbors import ClonotypeNeighbors -from scirpy.util import DataHandler, _is_na, read_cell_indices, tqdm +from scirpy.util import DataHandler, read_cell_indices, tqdm from ._clonotypes import _common_doc, _common_doc_parallelism, _doc_clonotype_definition, _validate_parameters @@ -407,7 +407,7 @@ def reduce_fun(x): # convert nan-equivalents to real nan values. for col in df_res: - df_res.loc[_is_na(df_res[col]), col] = None + df_res.loc[pd.isna(df_res[col]), col] = None if inplace: for col in df_res: diff --git a/src/scirpy/tl/_repertoire_overlap.py b/src/scirpy/tl/_repertoire_overlap.py index 9fe8951c2..5dd1a369e 100644 --- a/src/scirpy/tl/_repertoire_overlap.py +++ b/src/scirpy/tl/_repertoire_overlap.py @@ -5,7 +5,7 @@ from scipy.cluster import hierarchy as sc_hierarchy from scipy.spatial import distance as sc_distance -from scirpy.util import DataHandler, _is_na, _normalize_counts +from scirpy.util import DataHandler, _normalize_counts @DataHandler.inject_param_docs() @@ -65,7 +65,7 @@ def repertoire_overlap( obs[normalize] = params.get_obs(normalize) # Remove NA rows - na_mask = _is_na(obs[groupby]) | _is_na(obs[target_col]) + na_mask = pd.isna(obs[groupby]) | pd.isna(obs[target_col]) df = obs.loc[~na_mask, :].copy() # Normalize to fractions