diff --git a/.gitignore b/.gitignore index 4518323..acca80d 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,7 @@ dist/ # Temporary *.log *.tmp + + +tasks.py +TODO.md \ No newline at end of file diff --git a/TODO.md b/TODO.md index 7aed279..21ebe2e 100644 --- a/TODO.md +++ b/TODO.md @@ -6,24 +6,25 @@ This document outlines future enhancements, features, and ideas for improving th ## ๐Ÿ“ฆ 1. Interface and UX -- [ ] Create a `generate(..., return_type="df" | "dict")` interface +- [โœ…] Create a `generate(..., return_type="df" | "dict")` interface - [ ] Add `__version__` using `importlib.metadata` or `poetry-dynamic-versioning` - [ ] Build a CLI with `typer` or `click` -- [ ] Add example notebooks for each model (`notebooks/` folder) +- [โœ…] Add example notebooks or scripts for each model (`examples/` folder) --- ## ๐Ÿ“š 2. Documentation -- [ ] Add a "Model Comparison Guide" section -- [ ] Add "How It Works" sections for each model -- [ ] Include usage tutorials in Jupyter format on RTD +- [โœ…] Add a "Model Comparison Guide" section (`index.md` + `theory.md`) +- [โœ…] Add "How It Works" sections for each model (`theory.md`) +- [โœ…] Include usage examples in index with real calls - [ ] Optional: add multilingual docs using `sphinx-intl` --- ## ๐Ÿงช 3. Testing and Quality +- [โœ…] Add tests for each model (e.g., `test_tdcm.py`, `test_thmm.py`, `test_aft.py`) - [ ] Add property-based tests with `hypothesis` - [ ] Cover edge cases (e.g., invalid parameters, n=0, negative censoring) - [ ] Run tests on multiple Python versions (CI matrix) @@ -34,7 +35,8 @@ This document outlines future enhancements, features, and ideas for improving th - [ ] Add Piecewise Exponential Model support - [ ] Add competing risks / multi-event simulation -- [ ] Implement parametric AFT models (log-normal, log-logistic) +- [โœ…] Implement parametric AFT models (log-normal) +- [ ] Implement parametric AFT models (log-logistic, weibull) - [ ] Simulate time-varying hazards - [ ] Add informative or covariate-dependent censoring @@ -59,18 +61,32 @@ This document outlines future enhancements, features, and ideas for improving th ## ๐Ÿ” 7. Other Ideas - [ ] Add performance benchmarks for each model -- [ ] Improve PyPI discoverability (add keywords) +- [โœ…] Improve PyPI discoverability (added tags, keywords, docs) - [ ] Create a Streamlit or Gradio live demo --- ## ๐Ÿง  8. New Survival Models to Implement -- [ ] Accelerated Failure Time (AFT) models: - - [X] Log-Normal AFT - - [ ] Log-Logistic AFT - - [ ] Weibull AFT formulation -- [ ] Piecewise Exponential Model -- [ ] Competing Risks simulation -- [ ] Recurrent Events simulation +- [โœ…] Log-Normal AFT +- [ ] Log-Logistic AFT +- [ ] Weibull AFT +- [ ] Piecewise Exponential +- [ ] Competing Risks +- [ ] Recurrent Events - [ ] Mixture Cure Model + +--- + +## ๐Ÿงฌ 9. Advanced Data Simulation Features + +- [ ] Recurrent events (multiple events per individual) +- [ ] Frailty models (random effects) +- [ ] Time-varying hazard functions +- [ ] Multi-line start-stop formatted data +- [ ] Competing risks with cause-specific hazards +- [ ] Simulate violations of PH assumption +- [ ] Grouped / clustered data generation +- [ ] Mixed covariates: categorical, continuous, binary +- [ ] Joint models (longitudinal + survival outcome) +- [ ] Controlled scenarios for robustness tests diff --git a/pyproject.toml b/pyproject.toml index 4994656..5cad2c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "gen_surv" version = "0.7.1" description = "A Python package for simulating survival data, inspired by the R package genSurv" -authors = ["Diogo Ribeiro "] +authors = ["Diogo Ribeiro "] license = "MIT" readme = "README.md" packages = [{ include = "gen_surv" }] diff --git a/stubs/gen_surv/__init__.pyi b/stubs/gen_surv/__init__.pyi deleted file mode 100644 index 1bd66c2..0000000 --- a/stubs/gen_surv/__init__.pyi +++ /dev/null @@ -1 +0,0 @@ -from .interface import generate as generate diff --git a/stubs/gen_surv/aft.pyi b/stubs/gen_surv/aft.pyi deleted file mode 100644 index ca00051..0000000 --- a/stubs/gen_surv/aft.pyi +++ /dev/null @@ -1,3 +0,0 @@ -from _typeshed import Incomplete - -def gen_aft_log_normal(n, beta, sigma, model_cens, cens_par, seed: Incomplete | None = None): ... diff --git a/stubs/gen_surv/bivariate.pyi b/stubs/gen_surv/bivariate.pyi deleted file mode 100644 index 23596f2..0000000 --- a/stubs/gen_surv/bivariate.pyi +++ /dev/null @@ -1 +0,0 @@ -def sample_bivariate_distribution(n, dist, corr, dist_par): ... diff --git a/stubs/gen_surv/censoring.pyi b/stubs/gen_surv/censoring.pyi deleted file mode 100644 index bb9ffd0..0000000 --- a/stubs/gen_surv/censoring.pyi +++ /dev/null @@ -1,4 +0,0 @@ -import numpy as np - -def runifcens(size: int, cens_par: float) -> np.ndarray: ... -def rexpocens(size: int, cens_par: float) -> np.ndarray: ... diff --git a/stubs/gen_surv/cmm.pyi b/stubs/gen_surv/cmm.pyi deleted file mode 100644 index 44a810e..0000000 --- a/stubs/gen_surv/cmm.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from gen_surv.censoring import rexpocens as rexpocens, runifcens as runifcens -from gen_surv.validate import validate_gen_cmm_inputs as validate_gen_cmm_inputs - -def generate_event_times(z1: float, beta: list, rate: list) -> dict: ... -def gen_cmm(n, model_cens, cens_par, beta, covar, rate): ... diff --git a/stubs/gen_surv/cphm.pyi b/stubs/gen_surv/cphm.pyi deleted file mode 100644 index cac39b3..0000000 --- a/stubs/gen_surv/cphm.pyi +++ /dev/null @@ -1,6 +0,0 @@ -import pandas as pd -from gen_surv.censoring import rexpocens as rexpocens, runifcens as runifcens -from gen_surv.validate import validate_gen_cphm_inputs as validate_gen_cphm_inputs - -def generate_cphm_data(n, rfunc, cens_par, beta, covariate_range): ... -def gen_cphm(n: int, model_cens: str, cens_par: float, beta: float, covar: float) -> pd.DataFrame: ... diff --git a/stubs/gen_surv/interface.pyi b/stubs/gen_surv/interface.pyi deleted file mode 100644 index a6a0a85..0000000 --- a/stubs/gen_surv/interface.pyi +++ /dev/null @@ -1,7 +0,0 @@ -from gen_surv.aft import gen_aft_log_normal as gen_aft_log_normal -from gen_surv.cmm import gen_cmm as gen_cmm -from gen_surv.cphm import gen_cphm as gen_cphm -from gen_surv.tdcm import gen_tdcm as gen_tdcm -from gen_surv.thmm import gen_thmm as gen_thmm - -def generate(model: str, **kwargs): ... diff --git a/stubs/gen_surv/tdcm.pyi b/stubs/gen_surv/tdcm.pyi deleted file mode 100644 index 15e7dd0..0000000 --- a/stubs/gen_surv/tdcm.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from gen_surv.bivariate import sample_bivariate_distribution as sample_bivariate_distribution -from gen_surv.censoring import rexpocens as rexpocens, runifcens as runifcens -from gen_surv.validate import validate_gen_tdcm_inputs as validate_gen_tdcm_inputs - -def generate_censored_observations(n, dist_par, model_cens, cens_par, beta, lam, b): ... -def gen_tdcm(n, dist, corr, dist_par, model_cens, cens_par, beta, lam): ... diff --git a/stubs/gen_surv/thmm.pyi b/stubs/gen_surv/thmm.pyi deleted file mode 100644 index 915c59d..0000000 --- a/stubs/gen_surv/thmm.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from gen_surv.censoring import rexpocens as rexpocens, runifcens as runifcens -from gen_surv.validate import validate_gen_thmm_inputs as validate_gen_thmm_inputs - -def calculate_transitions(z1: float, cens_par: float, beta: list, rate: list, rfunc) -> dict: ... -def gen_thmm(n, model_cens, cens_par, beta, covar, rate): ... diff --git a/stubs/gen_surv/validate.pyi b/stubs/gen_surv/validate.pyi deleted file mode 100644 index 9ba0053..0000000 --- a/stubs/gen_surv/validate.pyi +++ /dev/null @@ -1,6 +0,0 @@ -def validate_gen_cphm_inputs(n: int, model_cens: str, cens_par: float, covar: float): ... -def validate_gen_cmm_inputs(n: int, model_cens: str, cens_par: float, beta: list, covar: float, rate: list): ... -def validate_gen_tdcm_inputs(n: int, dist: str, corr: float, dist_par: list, model_cens: str, cens_par: float, beta: list, lam: float): ... -def validate_gen_thmm_inputs(n: int, model_cens: str, cens_par: float, beta: list, covar: float, rate: list): ... -def validate_dg_biv_inputs(n: int, dist: str, corr: float, dist_par: list): ... -def validate_gen_aft_log_normal_inputs(n, beta, sigma, model_cens, cens_par) -> None: ...