-
Notifications
You must be signed in to change notification settings - Fork 182
Add adapt_vars_like to align .var between AnnData objects (issue #1697) #1986
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
06bb519
24da345
74b3ebd
aa2295f
16264bf
46b728f
c09cf63
d1910d4
0dd7878
5d6279e
a14af7f
a30c365
fa48833
11fdb50
715fd0f
63a2933
8c30646
ac4c067
3a65f62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,4 +62,5 @@ def __getattr__(attr_name: str) -> Any: | |
| "settings", | ||
| "types", | ||
| "typing", | ||
| "utils", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| from collections.abc import Iterable, Mapping, Sequence | ||
| from typing import Any, Literal | ||
|
|
||
|
|
||
| logger = get_logger(__name__) | ||
|
|
||
|
|
||
|
|
@@ -450,3 +451,82 @@ def module_get_attr_redirect( | |
| return getattr(mod, new_path) | ||
| msg = f"module {full_old_module_path} has no attribute {attr_name!r}" | ||
| raise AttributeError(msg) | ||
|
|
||
|
|
||
| def adapt_vars_like( | ||
| source: AnnData, | ||
| target: AnnData, | ||
| fill_value: float = 0.0, | ||
| ) -> AnnData: | ||
| """ | ||
| Adapt the `.var` structure of `target` to match that of `source`. | ||
|
|
||
| This function makes sure that the `target` AnnData object has the same set | ||
| of genes (`.var_names`) as the `source` AnnData object. It fills in the | ||
| any missing genes in the `target` object with a specified `fill_value`. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| source | ||
| Reference AnnData object whose genes (.var) define the desired structure. | ||
| target | ||
| AnnData object to be adapted to match the source's gene structure. | ||
| fill_value | ||
| Value used to fill in missing genes. Defaults to 0.0. | ||
|
|
||
| Returns | ||
| ------- | ||
| AnnData | ||
| A new AnnData object with the genes matching the source's structure and data from | ||
| `target`, with missing values filled in with `fill_value`. | ||
|
|
||
| """ | ||
| # importing here to avoid circular import issues | ||
| from ._core.anndata import AnnData | ||
| from ._core.merge import Reindexer | ||
|
|
||
| # copy over the var structure from source = becomes new feature index structure | ||
| # that we want target to match | ||
| new_var = source.var.copy() | ||
| # Initializing reindexer = help map the old gene indices to the new structure | ||
| reindexer = Reindexer(target.var.index, new_var.index) | ||
| # if target object actually has .X matrix (i.e. expression data), I reindex it to match the source | ||
|
|
||
| if target.X is not None: | ||
| new_X = reindexer(target.X, axis=1, fill_value=fill_value) | ||
| else: | ||
| # otherwise I just create a dummy matrix of the right shape filled with a constant value | ||
| new_X = np.full((target.n_obs, len(new_var)), fill_value) | ||
|
||
|
|
||
| # reindexing each layer matrix along the gene (column) axis so it matches new structure | ||
| new_layers = { | ||
| k: reindexer(v, axis=1, fill_value=fill_value) for k, v in target.layers.items() | ||
| } | ||
| # reindex varm which stores matrix-like annotation for each gene | ||
| # for each entry, reindex along the gene axis, cast it to a numpy array to make it uniform | ||
| # convert it to a plain python list to avoid type checker error | ||
| new_varm: dict[str, Sequence[Any]] = { | ||
| k: np.asarray(reindexer(v, axis=0, fill_value=fill_value)).tolist() | ||
ilan-gold marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for k, v in target.varm.items() | ||
| } | ||
| # creating new Anndata Object | ||
| # directly copying .obs without changes - we ar enot touching the cells here, just aligning features | ||
| new_adata = AnnData( | ||
| X=new_X, | ||
| obs=target.obs.copy(), | ||
| var=new_var, | ||
| varm=new_varm, | ||
| layers=new_layers, | ||
amalia-k510 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| # if the original target fad a .raw layer, reindex it as well | ||
| # since .raw is immutable (from what I understand), we create a new AnnData object | ||
| # with matching .X, .obs, .var assigning it directly to new_adata.raw | ||
| if target.raw is not None: | ||
| new_raw_X = reindexer(target.raw.X, axis=1, fill_value=fill_value) | ||
| new_adata.raw = AnnData( | ||
| X=new_raw_X, | ||
| var=source.var.copy(), | ||
| obs=target.obs.copy(), | ||
| ) | ||
|
|
||
| return new_adata | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,13 +2,14 @@ | |
|
|
||
| from itertools import repeat | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import pytest | ||
| from scipy import sparse | ||
|
|
||
| import anndata as ad | ||
| from anndata.tests.helpers import gen_typed_df | ||
| from anndata.utils import make_index_unique | ||
| from anndata.utils import adapt_vars_like, make_index_unique | ||
|
|
||
|
|
||
| def test_make_index_unique(): | ||
|
|
@@ -55,3 +56,55 @@ def test_adata_unique_indices(): | |
|
|
||
| pd.testing.assert_index_equal(v.obsm["df"].index, v.obs_names) | ||
| pd.testing.assert_index_equal(v.varm["df"].index, v.var_names) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| ("source", "target", "expected_X"), | ||
| [ | ||
| pytest.param( | ||
| ad.AnnData(X=np.ones((1, 3)), var=pd.DataFrame(index=["a", "b", "c"])), | ||
| ad.AnnData( | ||
| X=np.array([[1, 2, 3]]), var=pd.DataFrame(index=["a", "b", "c"]) | ||
| ), | ||
| np.array([[1, 2, 3]]), | ||
| id="exact_match", | ||
| ), | ||
| pytest.param( | ||
| ad.AnnData(X=np.ones((1, 3)), var=pd.DataFrame(index=["a", "b", "c"])), | ||
| ad.AnnData( | ||
| X=np.array([[3, 2, 1]]), var=pd.DataFrame(index=["c", "b", "a"]) | ||
| ), | ||
| np.array([[1, 2, 3]]), | ||
| id="different_order", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_adapt_vars(source, target, expected_X): | ||
| output = adapt_vars_like(source, target) | ||
| np.testing.assert_array_equal(output.X, expected_X) | ||
| assert list(output.var_names) == list(source.var_names) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| ("source", "target", "fill_value", "expected_X"), | ||
| [ | ||
| pytest.param( | ||
| ad.AnnData(X=np.ones((1, 2)), var=pd.DataFrame(index=["g1", "g2"])), | ||
| ad.AnnData(X=np.array([[7, 8]]), var=pd.DataFrame(index=["g3", "g4"])), | ||
| 0.5, | ||
| np.array([[0.5, 0.5]]), | ||
| id="no_shared_genes", | ||
| ), | ||
| pytest.param( | ||
| ad.AnnData(X=np.ones((1, 3)), var=pd.DataFrame(index=["g1", "g2", "g3"])), | ||
| ad.AnnData(X=np.array([[1, 3]]), var=pd.DataFrame(index=["g1", "g3"])), | ||
| -1, | ||
| np.array([[1, -1, 3]]), | ||
| id="missing_genes", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_adapt_vars_with_fill_value(source, target, fill_value, expected_X): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's merge this test and |
||
| output = adapt_vars_like(source, target, fill_value=fill_value) | ||
| np.testing.assert_array_equal(output.X, expected_X) | ||
| assert list(output.var_names) == list(source.var_names) | ||
Uh oh!
There was an error while loading. Please reload this page.