scverse · amalia-k510 · May 14, 2025 · May 14, 2025 · May 14, 2025 · May 14, 2025
diff --git a/src/anndata/__init__.py b/src/anndata/__init__.py
@@ -20,7 +20,7 @@
     WriteWarning,
 )
 from .io import read_h5ad, read_zarr
-from .utils import module_get_attr_redirect
+from .utils import adapt_vars_like, module_get_attr_redirect
 
 # Submodules need to be imported last
 from . import abc, experimental, typing, io, types  # isort: skip
@@ -53,6 +53,7 @@ def __getattr__(attr_name: str) -> Any:
     "WriteWarning",
     "__version__",
     "abc",
+    "adapt_vars_like",
     "concat",
     "experimental",
     "io",

diff --git a/src/anndata/utils.py b/src/anndata/utils.py
@@ -450,3 +450,48 @@
         return getattr(mod, new_path)
     msg = f"module {full_old_module_path} has no attribute {attr_name!r}"
     raise AttributeError(msg)
+
+
+def adapt_vars_like(
+    source: AnnData, target: AnnData, fill_value: float = 0.0
+) -> AnnData:
+    # source = AnnData object that defines the desired genes
+    # target = the data you want to reshape to match source
+    # fill_vlaue = what value to use for missing genes (default set to 0.0)
+    # returns a new AnnData object with the same genes as source
+    """
+    Make target have the same .var (genes) as source., missing genes are filled with fill_value.
+    """
+    # importing here to avoid circular import issues
+    from ._core.anndata import AnnData
+
+    # needed to add it as when trying to call target.X[:, target.var.index]
+    # it would raise an error if target.X is None
+    if target.X is None:
+        msg = "target.X is None; cannot adapt vars without a data matrix."
+        raise ValueError(msg)
+    # this will define the gene list we want to match
+    new_var = source.var.copy()
+    # initializing a new dense np array of shape (number of target cells, number of genes in source)
+    # filled with fill_value
+    # this will become the new .X matrix.
+    # It makes sure all genes in source are represented, and placeholders are ready for copying shared ones
+    new_x = np.full((target.n_obs, new_var.shape[0]), fill_value, dtype=target.X.dtype)
+    # finds gene names that appeare in both source and target
+    shared_genes = source.var_names.intersection(target.var_names)
+    # positions of shared genes in source
+    source_idx = new_var.index.get_indexer(shared_genes)
+    # positions of those same genes in target
+    target_idx = target.var.index.get_indexer(shared_genes)
+    # fills the new .X array for all target cells (rows)
+    # also inserts expression values from target.X into the correct columns of new_x
+    # for the shared genes
+    # only genes in both source and target are copied over.
+    # everything else remains at fill_value
+    new_x[:, source_idx] = target.X[:, target_idx]
+    # creates a new AnnData object with the new .X and .var
+    # .X is the filled new_x array
+    # .obs is a copy of the target.obs
+    # .var is copied from source.var, making sure alignment of gene annotations
+    new_adata = AnnData(X=new_x, obs=target.obs.copy(), var=new_var)
+    return new_adata