feat: add dataset export utility (#41)

DiogoRibeiro7 · web-flow · commit 5a184ca262bf · 2025-07-28T22:43:34.000+01:00
diff --git a/CITATION.cff b/CITATION.cff
@@ -10,6 +10,7 @@ preferred-citation:
   authors:
     - family-names: Ribeiro
       given-names: Diogo
+      alias: DiogoRibeiro7
       orcid: "https://orcid.org/0009-0001-2022-7072"
       affiliation: "ESMAD - Instituto Politécnico do Porto"
       email: "dfr@esmad.ipp.pt"
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 ```bash
 poetry install
 ```
+This package requires **Python 3.10** or later.
 ## ✨ Features
 
 - Consistent interface across models
@@ -31,6 +32,7 @@ poetry install
 - Mixture cure and piecewise exponential models
 - Competing risks generators (constant and Weibull hazards)
 - Command-line interface powered by `Typer`
+- Export utilities for CSV, JSON, and Feather formats
 
 ## 🧪 Example
 
@@ -98,6 +100,7 @@ python -m gen_surv dataset aft_ln --n 100 > data.csv
 | `sample_bivariate_distribution()` | Sample correlated Weibull or exponential times |
 | `runifcens()` | Generate uniform censoring times |
 | `rexpocens()` | Generate exponential censoring times |
+| `export_dataset()` | Save a dataset to CSV, JSON or Feather |
 
 
 ```text
diff --git a/gen_surv/__init__.py b/gen_surv/__init__.py
@@ -17,6 +17,7 @@
 from .competing_risks import gen_competing_risks, gen_competing_risks_weibull
 from .mixture import gen_mixture_cure, cure_fraction_estimate
 from .piecewise import gen_piecewise_exponential
+from .export import export_dataset
 
 # Helper functions
 from .bivariate import sample_bivariate_distribution
@@ -61,6 +62,7 @@
     "sample_bivariate_distribution",
     "runifcens",
     "rexpocens",
+    "export_dataset",
 ]
 
 # Add visualization tools to __all__ if available
diff --git a/gen_surv/export.py b/gen_surv/export.py
@@ -0,0 +1,44 @@
+"""Data export utilities for gen_surv.
+
+This module provides helper functions to save generated
+survival datasets in various formats.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+import pandas as pd
+
+
+def export_dataset(df: pd.DataFrame, path: str, fmt: Optional[str] = None) -> None:
+    """Save a DataFrame to disk.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing survival data.
+    path : str
+        File path to write to. The extension is used to infer the format
+        when ``fmt`` is ``None``.
+    fmt : {"csv", "json", "feather"}, optional
+        Format to use. If omitted, inferred from ``path``.
+
+    Raises
+    ------
+    ValueError
+        If the format is not one of the supported types.
+    """
+    if fmt is None:
+        fmt = os.path.splitext(path)[1].lstrip(".").lower()
+
+    if fmt == "csv":
+        df.to_csv(path, index=False)
+    elif fmt == "json":
+        df.to_json(path, orient="table")
+    elif fmt in {"feather", "ft"}:
+        df.reset_index(drop=True).to_feather(path)
+    else:
+        raise ValueError(f"Unsupported export format: {fmt}")
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,14 +17,14 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Medical Science Apps.",
     "Topic :: Scientific/Engineering :: Mathematics",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "License :: OSI Approved :: MIT License",
 ]
 
 [tool.poetry.dependencies]
-python = "^3.9"
+python = ">=3.10,<3.13"
 numpy = "^1.26"
 pandas = "^2.2.3"
 typer = "^0.12.3"
@@ -62,7 +62,7 @@ build_command = ""
 
 [tool.black]
 line-length = 88
-target-version = ['py39']
+target-version = ['py310']
 include = '\.pyi?$'
 
 [tool.isort]
@@ -74,7 +74,7 @@ max-line-length = 88
 extend-ignore = ["E203", "W503", "E501", "W291", "W293", "W391", "F401", "F841", "E402", "E302", "E305"]
 
 [tool.mypy]
-python_version = "3.9"
+python_version = "3.10"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = true
diff --git a/tests/test_export.py b/tests/test_export.py
@@ -0,0 +1,22 @@
+import os
+import pandas as pd
+from gen_surv import generate, export_dataset
+
+
+def test_export_dataset_csv(tmp_path):
+    df = generate(model="cphm", n=5, model_cens="uniform", cens_par=1.0, beta=0.5, covariate_range=1.0)
+    out_file = tmp_path / "data.csv"
+    export_dataset(df, str(out_file))
+    assert out_file.exists()
+    loaded = pd.read_csv(out_file)
+    pd.testing.assert_frame_equal(df.reset_index(drop=True), loaded)
+
+
+def test_export_dataset_json(tmp_path):
+    df = generate(model="cphm", n=5, model_cens="uniform", cens_par=1.0, beta=0.5, covariate_range=1.0)
+    out_file = tmp_path / "data.json"
+    export_dataset(df, str(out_file))
+    assert out_file.exists()
+    loaded = pd.read_json(out_file, orient="table")
+    pd.testing.assert_frame_equal(df.reset_index(drop=True), loaded)
+