diff --git a/docs/release-notes/3980.fix.md b/docs/release-notes/3980.fix.md new file mode 100644 index 0000000000..b5ec7c6fcb --- /dev/null +++ b/docs/release-notes/3980.fix.md @@ -0,0 +1 @@ +Prevent segfault when running {func}`scanpy.pp.highly_variable_genes` with `flavor='seurat_v3{,_paper}'` and some all-zero genes {smaller}`P Angerer` diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py index c2e26412e8..11b0704328 100644 --- a/src/scanpy/_compat.py +++ b/src/scanpy/_compat.py @@ -113,6 +113,8 @@ def warn( more_file_prefixes: tuple[str, ...] = (), ) -> None: """Issue a warning, skipping frames from certain file prefixes.""" + __tracebackhide__ = True + if not skip_file_prefixes: skip_file_prefixes = (*_FILE_PREFIXES, *more_file_prefixes) elif more_file_prefixes: diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index f980ac2533..906f0fb16e 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -183,14 +183,13 @@ def _highly_variable_genes_seurat_v3( # noqa: PLR0912, PLR0915 # These get computed anyway for loess if isinstance(mean, DaskArray): mean, var = mean.compute(), var.compute() - not_const = var > 0 estimat_var = np.zeros(data.shape[1], dtype=np.float64) - - y = np.log10(var[not_const]) - x = np.log10(mean[not_const]) - model = loess(x, y, span=span, degree=2) - model.fit() - estimat_var[not_const] = model.outputs.fitted_values + if (not_const := var > 0).any(): + y = np.log10(var[not_const]) + x = np.log10(mean[not_const]) + model = loess(x, y, span=span, degree=2) + model.fit() + estimat_var[not_const] = model.outputs.fitted_values reg_std = np.sqrt(10**estimat_var) # clip large values as in Seurat diff --git a/tests/test_highly_variable_genes.py b/tests/test_highly_variable_genes.py index 847f1ae75c..670c647ab9 100644 --- a/tests/test_highly_variable_genes.py +++ b/tests/test_highly_variable_genes.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import pytest +import scipy.sparse as sps from anndata import AnnData from fast_array_utils import stats from pandas.testing import assert_frame_equal, assert_index_equal @@ -522,6 +523,15 @@ def test_seurat_v3_warning(): sc.pp.highly_variable_genes(pbmc, flavor="seurat_v3") +@needs.skmisc +def test_seurat_v3_degenerate() -> None: + """Tests that the flavor handles all-zero genes.""" + adata = AnnData(sps.random(10, 1000, density=0.001, format="csr", dtype="int")) + adata.X.data = np.abs(adata.X.data) + + sc.pp.highly_variable_genes(adata, flavor="seurat_v3") + + def test_batches(): adata = pbmc68k_reduced() adata.X[:100, :100] = np.zeros((100, 100))