test: broaden competing risks coverage (#67)

DiogoRibeiro7 · web-flow · commit 77b9f0606c0e · 2025-08-04T14:51:25.000+01:00
diff --git a/tests/test_cmm.py b/tests/test_cmm.py
@@ -1,18 +1,90 @@
 import os
 import sys
 
+import numpy as np
+import pandas as pd
+
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-from gen_surv.cmm import gen_cmm
+from gen_surv.cmm import gen_cmm, generate_event_times
+
 
+def test_generate_event_times_reproducible():
+    np.random.seed(0)
+    result = generate_event_times(
+        z1=1.0,
+        beta=[0.1, 0.2, 0.3],
+        rate=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    )
+    assert np.isclose(result["t12"], 0.7201370350469476)
+    assert np.isclose(result["t13"], 1.0282691393768246)
+    assert np.isclose(result["t23"], 0.6839405281667484)
 
-def test_gen_cmm_shape():
+
+def test_gen_cmm_uniform_reproducible():
+    np.random.seed(42)
     df = gen_cmm(
-        n=50,
+        n=5,
         model_cens="uniform",
         cens_par=1.0,
         beta=[0.1, 0.2, 0.3],
         covariate_range=2.0,
-        rate=[0.1, 1.0, 0.2, 1.0, 0.3, 1.0],
+        rate=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    )
+    expected = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4, 5],
+            "start": [0.0] * 5,
+            "stop": [
+                0.019298197410170713,
+                0.05808361216819946,
+                0.5550989864862181,
+                0.2117537394012932,
+                0.19451374567187332,
+            ],
+            "status": [1, 0, 1, 1, 1],
+            "X0": [
+                0.749080237694725,
+                1.9014286128198323,
+                1.4639878836228102,
+                1.1973169683940732,
+                0.31203728088487304,
+            ],
+            "transition": [1.0, float("nan"), 2.0, 1.0, 1.0],
+        }
+    )
+    pd.testing.assert_frame_equal(df, expected)
+
+
+def test_gen_cmm_exponential_reproducible():
+    np.random.seed(42)
+    df = gen_cmm(
+        n=5,
+        model_cens="exponential",
+        cens_par=1.0,
+        beta=[0.1, 0.2, 0.3],
+        covariate_range=2.0,
+        rate=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    )
+    expected = pd.DataFrame(
+        {
+            "id": [1, 2, 3, 4, 5],
+            "start": [0.0] * 5,
+            "stop": [
+                0.019298197410170713,
+                0.059838768608680676,
+                0.5550989864862181,
+                0.2117537394012932,
+                0.19451374567187332,
+            ],
+            "status": [1, 0, 1, 1, 1],
+            "X0": [
+                0.749080237694725,
+                1.9014286128198323,
+                1.4639878836228102,
+                1.1973169683940732,
+                0.31203728088487304,
+            ],
+            "transition": [1.0, float("nan"), 2.0, 1.0, 1.0],
+        }
     )
-    assert df.shape[1] == 6
-    assert "transition" in df.columns
+    pd.testing.assert_frame_equal(df, expected)
diff --git a/tests/test_competing_risks.py b/tests/test_competing_risks.py
@@ -1,6 +1,6 @@
-"""
-Tests for Competing Risks models.
-"""
+"""Tests for Competing Risks models."""
+
+import os
 
 import numpy as np
 import pandas as pd
@@ -16,6 +16,8 @@
     gen_competing_risks_weibull,
 )
 
+os.environ.setdefault("MPLBACKEND", "Agg")
+
 
 def test_gen_competing_risks_basic():
     """Test that the competing risks generator runs without errors."""
@@ -83,6 +85,13 @@ def test_competing_risks_parameters():
         gen_competing_risks(n=10, n_risks=2, model_cens="invalid", seed=42)
 
 
+def test_invalid_covariate_dist():
+    with pytest.raises(ChoiceError):
+        gen_competing_risks(n=5, n_risks=2, covariate_dist="unknown", seed=1)
+    with pytest.raises(ChoiceError):
+        gen_competing_risks_weibull(n=5, n_risks=2, covariate_dist="unknown", seed=1)
+
+
 def test_competing_risks_weibull_parameters():
     """Test parameter validation in Weibull competing risks model."""
     # Test with invalid number of shape parameters
@@ -126,6 +135,18 @@ def test_cause_specific_cumulative_incidence():
         cause_specific_cumulative_incidence(df, time_points, cause=3)
 
 
+def test_cause_specific_cumulative_incidence_bounds():
+    df = gen_competing_risks(n=30, n_risks=2, seed=5)
+    max_time = df["time"].max()
+    time_points = [-1.0, 0.0, max_time + 1]
+    cif = cause_specific_cumulative_incidence(df, time_points, cause=1)
+    assert cif.iloc[0]["incidence"] == 0.0
+    expected = cause_specific_cumulative_incidence(df, [max_time], cause=1).iloc[0][
+        "incidence"
+    ]
+    assert cif.iloc[-1]["incidence"] == expected
+
+
 @given(
     n=st.integers(min_value=5, max_value=50),
     n_risks=st.integers(min_value=2, max_value=4),
@@ -170,6 +191,31 @@ def test_competing_risks_weibull_properties(n, n_risks, seed):
     assert len(status_counts) >= 2
 
 
+def test_gen_competing_risks_forces_event_types():
+    df = gen_competing_risks(
+        n=2,
+        n_risks=2,
+        baseline_hazards=[1e-9, 1e-9],
+        model_cens="uniform",
+        cens_par=0.1,
+        seed=0,
+    )
+    assert set(df["status"]) == {1, 2}
+
+
+def test_gen_competing_risks_weibull_forces_event_types():
+    df = gen_competing_risks_weibull(
+        n=2,
+        n_risks=2,
+        shape_params=[1, 1],
+        scale_params=[1e9, 1e9],
+        model_cens="uniform",
+        cens_par=0.1,
+        seed=0,
+    )
+    assert set(df["status"]) == {1, 2}
+
+
 def test_reproducibility():
     """Test that results are reproducible with the same seed."""
     df1 = gen_competing_risks(n=20, n_risks=2, seed=42)
@@ -202,14 +248,10 @@ def test_competing_risks_summary_with_categorical():
     assert "distribution" in summary["covariate_stats"]["group"]
 
 
-import matplotlib
-
-matplotlib.use("Agg")
-
-
 def test_plot_cause_specific_hazards_runs():
+    plt = pytest.importorskip("matplotlib.pyplot")
     df = gen_competing_risks(n=30, n_risks=2, seed=3)
     fig, ax = cr.plot_cause_specific_hazards(df, time_points=np.linspace(0, 5, 5))
     assert hasattr(fig, "savefig")
     assert len(ax.get_lines()) >= 1
-    matplotlib.pyplot.close(fig)
+    plt.close(fig)
diff --git a/tests/test_export.py b/tests/test_export.py
@@ -1,14 +1,80 @@
 import pandas as pd
 import pyreadr
+import pytest
 
+from gen_surv._validation import ChoiceError
 from gen_surv.export import export_dataset
 
 
-def test_export_dataset_rds(tmp_path):
+@pytest.mark.parametrize(
+    "fmt, reader",
+    [
+        ("csv", pd.read_csv),
+        ("feather", pd.read_feather),
+        ("ft", pd.read_feather),
+    ],
+)
+def test_export_dataset_formats(fmt, reader, tmp_path):
     df = pd.DataFrame({"time": [1.0, 2.0], "status": [1, 0]})
-    out = tmp_path / "data.rds"
+    out = tmp_path / f"data.{fmt}"
     export_dataset(df, out)
     assert out.exists()
-    result = pyreadr.read_r(out)[None]
-    result = result.astype(df.dtypes.to_dict())
+    result = reader(out).astype(df.dtypes.to_dict())
     pd.testing.assert_frame_equal(result.reset_index(drop=True), df)
+
+
+def test_export_dataset_json(monkeypatch, tmp_path):
+    df = pd.DataFrame({"time": [1.0, 2.0], "status": [1, 0]})
+    out = tmp_path / "data.json"
+
+    called = {}
+
+    def fake_to_json(self, path, orient="table"):
+        called["args"] = (path, orient)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("{}")
+
+    monkeypatch.setattr(pd.DataFrame, "to_json", fake_to_json)
+    export_dataset(df, out)
+    assert called["args"] == (out, "table")
+    assert out.exists()
+
+
+def test_export_dataset_rds(monkeypatch, tmp_path):
+    df = pd.DataFrame({"time": [1.0, 2.0], "status": [1, 0]})
+    out = tmp_path / "data.rds"
+
+    captured = {}
+
+    def fake_write_rds(path, data):
+        captured["path"] = path
+        captured["data"] = data
+        open(path, "wb").close()
+
+    monkeypatch.setattr(pyreadr, "write_rds", fake_write_rds)
+    export_dataset(df, out)
+    assert out.exists()
+    pd.testing.assert_frame_equal(captured["data"], df.reset_index(drop=True))
+
+
+def test_export_dataset_explicit_fmt(monkeypatch, tmp_path):
+    df = pd.DataFrame({"time": [1.0, 2.0], "status": [1, 0]})
+    out = tmp_path / "data.bin"
+
+    called = {}
+
+    def fake_to_json(self, path, orient="table"):
+        called["args"] = (path, orient)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("{}")
+
+    monkeypatch.setattr(pd.DataFrame, "to_json", fake_to_json)
+    export_dataset(df, out, fmt="json")
+    assert called["args"] == (out, "table")
+    assert out.exists()
+
+
+def test_export_dataset_invalid_format(tmp_path):
+    df = pd.DataFrame({"time": [1.0, 2.0], "status": [1, 0]})
+    with pytest.raises(ChoiceError):
+        export_dataset(df, tmp_path / "data.xxx", fmt="txt")