-
Notifications
You must be signed in to change notification settings - Fork 98
Open
Description
Hi as the title suggest I have been dealing with issues for a while when changing the envoirment versions in CI. I can finally say I have pinpointed the issue. In this test:
squidpy/tests/graph/test_ppatterns.py
Lines 50 to 85 in 10e5e44
@pytest.mark.parametrize("mode", ["moran", "geary"]) | |
@pytest.mark.parametrize("n_jobs", [1, 2]) | |
def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mode: str): | |
"""Check spatial autocorr reproducibility results.""" | |
rng = np.random.RandomState(42) | |
spatial_autocorr(dummy_adata, mode=mode) | |
dummy_adata.var["highly_variable"] = rng.choice([True, False], size=dummy_adata.var_names.shape) | |
# seed will work only when multiprocessing/loky | |
df_1 = spatial_autocorr(dummy_adata, mode=mode, copy=True, n_jobs=n_jobs, seed=42, n_perms=50) | |
df_2 = spatial_autocorr(dummy_adata, mode=mode, copy=True, n_jobs=n_jobs, seed=42, n_perms=50) | |
idx_df = df_1.index.values | |
idx_adata = dummy_adata[:, dummy_adata.var["highly_variable"].values].var_names.values | |
if mode == "moran": | |
UNS_KEY = MORAN_K | |
elif mode == "geary": | |
UNS_KEY = GEARY_C | |
assert UNS_KEY in dummy_adata.uns.keys() | |
# assert fdr correction in adata.uns | |
assert "pval_sim_fdr_bh" in df_1 | |
assert "pval_norm_fdr_bh" in dummy_adata.uns[UNS_KEY] | |
# test pval_norm same | |
np.testing.assert_array_equal(df_1["pval_norm"].values, df_2["pval_norm"].values) | |
np.testing.assert_array_equal(df_1["var_norm"].values, df_2["var_norm"].values) | |
assert dummy_adata.uns[UNS_KEY].columns.shape == (4,) | |
assert df_2.columns.shape == (9,) | |
# test highly variable | |
assert dummy_adata.uns[UNS_KEY].shape != df_1.shape | |
# assert idx are sorted and contain same elements | |
assert not np.array_equal(idx_df, idx_adata) | |
np.testing.assert_array_equal(sorted(idx_df), sorted(idx_adata)) | |
# check parallel gives same results | |
assert_frame_equal(df_1, df_2) | |
I get the following output
pytest tests/graph/test_ppatterns.py -k
assert "pval_norm_fdr_bh" in dummy_adata.uns[UNS_KEY]
# test pval_norm same
> np.testing.assert_array_equal(df_1["pval_norm"].values, df_2["pval_norm"].values)
E AssertionError:
E Arrays are not equal
E
E Mismatched elements: 44 / 44 (100%)
E Max absolute difference among violations: 4.16333634e-15
E Max relative difference among violations: 1.02475519e-13
E ACTUAL: array([0.037967, 0.070582, 0.071442, 0.079371, 0.118232, 0.15177 ,
E 0.18503 , 0.195551, 0.22175 , 0.224186, 0.226282, 0.233933,
E 0.236357, 0.25614 , 0.258449, 0.2623 , 0.286202, 0.289291,...
E DESIRED: array([0.037967, 0.070582, 0.071442, 0.079371, 0.118232, 0.15177 ,
E 0.18503 , 0.195551, 0.22175 , 0.224186, 0.226282, 0.233933,
E 0.236357, 0.25614 , 0.258449, 0.2623 , 0.286202, 0.289291,...
tests/graph/test_ppatterns.py:74: AssertionError
But when I add this line it is solved.
@pytest.mark.parametrize("mode", ["moran", "geary"])
@pytest.mark.parametrize("n_jobs", [1, 2])
def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mode: str):
import numba
numba.set_num_threads(1)
I have checked if this is about the seeds or not also. The only different results from those function calls seem to be the scores computed by numba compiled function. I have pinpointed with my debuggings that _morans_i
gives different results with the same inputs.
I'd just relax such tests but I want your opinions @flying-sheep
Metadata
Metadata
Assignees
Labels
No labels