Skip to content

Commit cb7e148

Browse files
authored
[Feat] Add dependence structures (#76)
* Large rewrite * Add mockup test and fix typo * up tests to check on lts and 1 * Proofread docs * Move methods to own folder * prettyfy the notations * Adding two methods to see if things fails. * error one too much sqrt in the variance * export stuff * typo * add square to the variance * Add a new dataset and document datasets * up * Bump version to 0.1.2 * typo * typos * typo * up disclaimer message * Update the reference to the paper
1 parent 819742c commit cb7e148

File tree

16 files changed

+2542
-113
lines changed

16 files changed

+2542
-113
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ jobs:
2626
fail-fast: false
2727
matrix:
2828
version:
29-
- '1.9'
3029
- '1.10'
30+
- '1'
3131
os:
3232
- ubuntu-latest
3333
arch:

Project.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
name = "NetSurvival"
22
uuid = "8f9d5d0e-dd2e-4568-92d4-f8c5d34f25cf"
33
authors = ["Oskar Laverny <oskar.laverny@univ-amu.fr> and contributors"]
4-
version = "0.1.1"
4+
version = "0.1.2"
55

66
[deps]
77
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
8+
Copulas = "ae264745-0b69-425e-9d9d-cf662c5eec93"
89
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
910
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
11+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
1012
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1113
RateTables = "d40fb65e-c2ee-4113-9e14-cb96ca0acb32"
14+
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
1215
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
1316
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1417
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"

data/ccolon.csv

Lines changed: 1968 additions & 0 deletions
Large diffs are not rendered by default.

docs/src/assets/references.bib

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
@article{PoharPerme2012,
2-
author = {Perme, Maja Pohar and Stare, Janez and Estève, Jacques},
2+
author = {{Pohar Perme}, Maja and Stare, Janez and Estève, Jacques},
33
title = "{On Estimation in Relative Survival}",
44
journal = {Biometrics},
55
volume = {68},
@@ -92,9 +92,9 @@ @book{ABGK1993
9292
doi = {10.1007/978-1-4612-4348-9},
9393
}
9494
95-
@article{PermePavlik2018,
95+
@article{Pavlik2018,
9696
title={Nonparametric relative survival analysis with the R package relsurv},
97-
author={Perme, Maja Pohar and Pavlic, Klemen},
97+
author={{Pohar Perme}, Maja and Pavlic, Klemen},
9898
journal={Journal of Statistical Software},
9999
volume={87},
100100
pages={1--27},
@@ -119,4 +119,56 @@ @article{cronin2000cumulative
119119
pages={1729--1740},
120120
year={2000},
121121
publisher={Wiley Online Library}
122+
}
123+
124+
@article{Zadnik2012,
125+
title={Cancer burden in Slovenia in comparison with the burden in other European countries},
126+
author={Zadnik, Vesna and {\v{Z}}akelj, Maja Primic and Krajc, Mateja},
127+
journal={Slovenian Medical Journal},
128+
volume={81},
129+
number={5},
130+
year={2012}
131+
}
132+
133+
@article{Zadnik2016,
134+
title={Cancer patients’ survival: standard calculation methods and some considerations regarding their interpretation},
135+
author={Zadnik, Vesna and {\v{Z}}agar, Tina and {\v{Z}}akelj, Maja Primic},
136+
journal={Slovenian Journal of Public Health},
137+
volume={55},
138+
number={2},
139+
pages={144--151},
140+
year={2016}
141+
}
142+
143+
@article{Giorgi2003,
144+
title={A relative survival regression model using B-spline functions to model non-proportional hazards},
145+
author={Giorgi, Roch and Abrahamowicz, Michal and Quantin, Catherine and Bolard, Philippe and Esteve, Jacques and Gouvernet, Joanny and Faivre, Jean},
146+
journal={Statistics in medicine},
147+
volume={22},
148+
number={17},
149+
pages={2767--2784},
150+
year={2003},
151+
publisher={Wiley Online Library}
152+
}
153+
154+
@article{Wolski2020,
155+
title = {A Permutation Test Based on the Restricted Mean Survival Time for Comparison of Net Survival Distributions in Non-Proportional Excess Hazard Settings},
156+
author = {Wolski, Anna and Graff{\'e}o, Nathalie and Giorgi, Roch and {the CENSUR working survival group}},
157+
year = {2020},
158+
month = jun,
159+
journal = {Statistical Methods in Medical Research},
160+
volume = {29},
161+
number = {6},
162+
pages = {1612--1623},
163+
doi = {10.1177/0962280219870217},
164+
}
165+
166+
@article{Laverny2025,
167+
title={Non-parametric estimation of net survival under dependence between death causes},
168+
author={Oskar Laverny and Nathalie Grafféo and Roch Giorgi},
169+
year={2025},
170+
eprint={2502.09273},
171+
archivePrefix={arXiv},
172+
primaryClass={math.ST},
173+
url={https://arxiv.org/abs/2502.09273},
122174
}

docs/src/getting_started.md

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ With these definitions and assumptions in mind, we will now present the four dif
5454

5555
where, in the variances, it is understood that when no more individuals are at risk $0/0$ gives $0$.
5656

57-
The Pohar Perme estimator [PoharPerme2012](@cite) is the newest addition to relative survival analysis between the four methods, particularly designed to handle situations where covariates may change over time. It is trusted from the field (see e.g. [PermePavlik2018](@cite) and [CharvatBelot2021](@cite)) that only this estimator should really be used, the other ones being included mostly for historical reasons and comparisons.
57+
The Pohar Perme estimator [PoharPerme2012](@cite) is the newest addition to relative survival analysis between the four methods, particularly designed to handle situations where covariates may change over time. It is trusted from the field (see e.g. [Pavlik2018](@cite) and [CharvatBelot2021](@cite)) that only this estimator should really be used, the other ones being included mostly for historical reasons and comparisons.
5858

5959

6060
```@docs
@@ -131,6 +131,40 @@ While the estimated lifepsan is directly taken from the `expectation` function.
131131
nessie
132132
```
133133

134+
## Relaxing the independence assumption
135+
136+
The independence assumption of the random vector $(E,P)$ can be relaxed by specifying a dependence structure for this random vector, defined by a copula from [Copulas.jl](https://github.com/lrnv/Copulas.jl). The description of the underlying method to compute the net survival, its variance and associated log-rank tests under these dependence assumptions are given in [Laverny2025](@cite).
137+
138+
The generalization of the Pohar Perme estimator with a given copula `C::Copulas.Copula` can be used as follows:
139+
140+
```julia
141+
C = FrankCopula(2,-10)
142+
fit(GenPoharPerme(C), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
143+
fit(GraffeoTest(C), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
144+
```
145+
146+
By default, `GraffeoTest(C::Copula)` uses `GenPoharPerme(C)` as a method to compute net survival in each category, but this modification also allows to use other methods, such as:
147+
148+
```julia
149+
fit(GraffeoTest(Ederer1()), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
150+
```
151+
152+
Even if these results are not supported by any theoretical work and are probably meaningless, it is fun to to see that the code goes through, thanks to the modularity of Julia's dispatch, even on routes that were not designed for.
153+
154+
```@docs
155+
GenPoharPerme
156+
```
157+
158+
159+
## Available Datasets
160+
161+
Two classroom datasets are provided in the package:
162+
163+
```@docs
164+
colrec
165+
ccolon
166+
```
167+
134168
## References
135169

136170
```@bibliography

src/GraffeoTest.jl

Lines changed: 77 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,19 @@ The produced test statistics is supposed to follow a chi squared distribution un
4949
References:
5050
* [Graffeo2016](@cite) Grafféo, Nathalie and Castell, Fabienne and Belot, Aurélien and Giorgi, Roch (2016). A Log-Rank-Type Test to Compare Net Survival Distributions.
5151
"""
52-
struct GraffeoTest
53-
∂N::Array{Float64, 3}
54-
∂V::Array{Float64, 3}
55-
∂Z::Array{Float64, 3}
56-
D::Array{Float64, 3}
57-
R::Array{Float64, 3}
58-
∂VZ::Array{Float64, 4}
52+
struct GraffeoTest{Method}
53+
method::Method
54+
t::Array{Float64,1}
55+
∂N::Array{Float64, 2}
56+
∂V::Array{Float64, 2}
57+
∂Z::Array{Float64, 2}
58+
D::Array{Float64, 2}
59+
R::Array{Float64, 2}
60+
∂VZ::Array{Float64, 3}
5961
stat::Float64
6062
df::Int64
6163
pval::Float64
62-
function GraffeoTest(T, Δ, age, year, rate_preds, strata, group, ratetable)
64+
function GraffeoTest(method::M, T, Δ, age, year, rate_preds, strata, group, ratetable) where M<:NPNSMethod
6365

6466
# This version of the test is HIGHLY INNEFICIENT.
6567
# We should avoid allocating that much memory.
@@ -70,80 +72,83 @@ struct GraffeoTest
7072
# get stratas and groups, count them.
7173
stratas = unique(strata)
7274
groups = unique(group)
73-
nstrata = length(stratas)
7475
ngroups = length(groups)
7576

7677
# Allocate:
77-
∂N = zeros(nstrata, ngroups, length(grid))
78-
∂V = zeros(nstrata, ngroups, length(grid))
79-
∂Z = zeros(nstrata, ngroups, length(grid))
80-
D = zeros(nstrata, ngroups, length(grid))
81-
R = zeros(nstrata, ngroups, length(grid))
82-
∂VZ = zeros(nstrata, ngroups, ngroups, length(grid))
83-
84-
num_excess = zero(grid)
85-
num_pop = zero(grid)
86-
num_variance = zero(grid)
87-
den_pop = zero(grid)
88-
den_excess = zero(grid)
78+
∂N = zeros(ngroups, length(grid))
79+
∂V = zeros(ngroups, length(grid))
80+
∂Z = zeros(ngroups, length(grid))
81+
D = zeros(ngroups, length(grid))
82+
R = zeros(ngroups, length(grid))
83+
∂VZ = zeros(ngroups, ngroups, length(grid))
84+
85+
∂Nₒ = zero(grid)
86+
∂Nₚ = zero(grid)
87+
∂Vₑ = zero(grid)
88+
Yₚ = zero(grid)
89+
Yₒ = zero(grid)
8990
∂t = [diff(grid)...,1.0]
9091

9192
# Compute Pohar Perme numerator and denominators on each strata&group (s,g)
9293
for s in eachindex(stratas)
9394
for g in eachindex(groups)
9495
idx = (group .== groups[g]) .&& (strata .== stratas[s])
95-
96-
num_excess .= 0
97-
num_pop .= 0
98-
num_variance .= 0
99-
den_pop .= 0
100-
den_excess .= 0
101-
Λ!(PoharPermeMethod, num_excess, den_excess, num_pop, den_pop, num_variance, T[idx], Δ[idx], age[idx], year[idx], rate_preds[idx,:], ratetable, grid, ∂t)
102-
∂N[s, g, :] .= num_excess.- num_pop
103-
∂V[s, g, :] .= num_variance
104-
D[s, g, :] .= den_excess
96+
∂Nₒ .= 0
97+
∂Nₚ .= 0
98+
∂Vₑ .= 0
99+
Yₚ .= 0
100+
Yₒ .= 0
101+
Λ!(method, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂Vₑ, T[idx], Δ[idx], age[idx], year[idx], rate_preds[idx,:], ratetable, grid, ∂t)
102+
∂N[g, :] .= ∂Nₒ .- ∂Nₚ
103+
∂V[g, :] .= ∂Vₑ
104+
D[g, :] .= Yₒ
105105
end
106-
end
107-
108-
# renormalize on groups, be carefull for zeros.
109-
R .= ifelse.(sum(D,dims=2) .== 0, 0, D ./ sum(D,dims=2))
110-
∂Z .= ∂N .- R .* sum(∂N,dims=2)
106+
107+
R .= ifelse.(sum(D,dims=1) .== 0, 0, D ./ sum(D,dims=1))
108+
∂Z .+= ∂N .- R .* sum(∂N,dims=1)
111109

112-
# Compute test variance on each strata
113-
for s in eachindex(stratas)
110+
# Compute test variance
114111
forin eachindex(groups)
115112
for g in eachindex(groups)
116113
for h in eachindex(groups)
117114
for t in eachindex(grid)
118-
∂VZ[s, g, h,t] += ((g==ℓ) - R[s, g, t]) * ((h==ℓ) - R[s, h, t]) .* ∂V[s, ℓ, t]
115+
∂VZ[g, h,t] += ((g==ℓ) - R[g, t]) * ((h==ℓ) - R[h, t]) .* ∂V[ℓ, t]
119116
end
120117
end
121118
end
122119
end
123120
end
124121

125-
# Cumulate accross time and stratas
126-
Z = dropdims(sum(∂Z, dims=(1,3)), dims=(1,3))
127-
VZ = dropdims(sum(∂VZ, dims=(1,4)), dims=(1,4))
122+
# Cumulate accross time
123+
Z = dropdims(sum(∂Z, dims=2), dims=2)
124+
VZ = dropdims(sum(∂VZ, dims=3), dims=3)
128125

129126
# Finally compute the stat and p-values:
130127
stat = dot(Z[1:end-1],(VZ[1:end-1,1:end-1] \ Z[1:end-1])) # test statistic
131128
df = ngroups-1 # number of degree of freedom of the chi-square test
132129
pval = ccdf(Chisq(df), stat[1]) # Obtained p-value.
133-
return new(∂N, ∂V, ∂Z, D, R, ∂VZ, stat[1], df, pval)
130+
return new{M}(method,grid,∂N, ∂V, ∂Z, D, R, ∂VZ, stat[1], df, pval)
134131
end
135132
end
136133

137-
function Base.show(io::IO, test::GraffeoTest)
138-
println(io, "Grafféo's log-rank-type-test")
139-
df = DataFrame(test_statistic = test.stat, degrees_of_freedom = test.df, p_value = test.pval)
140-
show(io, df)
134+
struct GraffeoTestHolder{T}
135+
m::T
141136
end
142137

143-
# The fitting and formula interfaces should be here.
138+
GraffeoTest(T, Δ, age, year, rate_preds, strata, group, ratetable) = GraffeoTest(PoharPermeMethod(),T, Δ, age, year, rate_preds, strata, group, ratetable)
144139

145-
function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {E<:GraffeoTest}
140+
GraffeoTest() = GraffeoTestHolder(PoharPermeMethod())
141+
GraffeoTest(m::M) where M<:NPNSMethod = GraffeoTestHolder(m)
142+
GraffeoTest{M}() where M<:NPNSMethod = GraffeoTestHolder(M())
143+
GraffeoTest(::Type{M}) where M<:NPNSMethod = GraffeoTestHolder(M())
144+
GraffeoTest(::Type{NPNSEstimator{M}}) where M<:NPNSMethod = GraffeoTestHolder(M())
145+
GraffeoTest(C::Cop) where Cop<:Copulas.Copula = GraffeoTest(GenPoharPermeMethod(C))
146146

147+
StatsBase.fit(X::GraffeoTestHolder{M}, formula, df, rt) where M<:NPNSMethod = StatsBase.fit(GraffeoTest, X.m, formula, df, rt)
148+
StatsBase.fit(::Type{GraffeoTest}, formula, df, rt) = StatsBase.fit(GraffeoTest(PoharPermeMethod()), formula, df, rt)
149+
StatsBase.fit(::Type{GraffeoTest{M}}, formula, df, rt) where {M<:NPNSMethod} = StatsBase.fit(GraffeoTest, M(), formula, df, rt)
150+
151+
function StatsBase.fit(GTH::GraffeoTestHolder{M}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {M<:NPNSMethod}
147152
terms = StatsModels.termvars(formula.rhs)
148153
tf = typeof(formula.rhs)
149154
types = (tf <: AbstractTerm) ? [tf] : typeof.(formula.rhs)
@@ -155,5 +160,27 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT
155160
resp = modelcols(apply_schema(formula,schema(df)).lhs,df)
156161
rate_predictors = _get_rate_predictors(rt,df)
157162

158-
return GraffeoTest(resp[:,1], resp[:,2], df.age, df.year, select(df,rate_predictors), strata, group, rt)
163+
return GraffeoTest(GTH.m, resp[:,1], resp[:,2], df.age, df.year, select(df,rate_predictors), strata, group, rt)
159164
end
165+
166+
# A show function:
167+
function Base.show(io::IO, test::GraffeoTest)
168+
println(io, "Grafféo's log-rank-type-test (Method: $(test.method))")
169+
df = DataFrame(test_statistic = test.stat, degrees_of_freedom = test.df, p_value = test.pval)
170+
show(io, df)
171+
end
172+
173+
# Potential other extraction methods:
174+
function statistic(X::GraffeoTest, at_time_T)
175+
# Cumulate accross time
176+
i_T = findlast(X.t .<= at_time_T)
177+
Z = dropdims(sum(X.∂Z[:,1:i_T], dims=2), dims=2)
178+
Γ = dropdims(sum(X.∂VZ[:,:,1:i_T], dims=3), dims=3)
179+
stat = dot(Z[1:end-1],Γ[1:end-1,1:end-1] \ Z[1:end-1]) # test statistic
180+
return stat
181+
end
182+
function pvalue(X::GraffeoTest, at_time_T)
183+
stat = statistic(X, at_time_T)
184+
df = size(X.∂Z,1)-1 # number of degree of freedom of the chi-square test
185+
return ccdf(Chisq(df), stat[1]) # Obtained p-value.
186+
end

src/EdererI.jl renamed to src/NPNSEMethods/EdererI.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ To call this function:
1515
"""
1616
const EdererI = NPNSEstimator{EdererIMethod}
1717

18-
function Λ!(::Type{EdererIMethod}, num_excess, den_excess, num_pop, den_pop, num_variance, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
18+
function Λ!(::EdererIMethod, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂V, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
1919
Tmax= Int(maximum(T))
2020
for i in eachindex(age)
2121
Tᵢ = searchsortedlast(grid, T[i])
@@ -26,14 +26,14 @@ function Λ!(::Type{EdererIMethod}, num_excess, den_excess, num_pop, den_pop, nu
2626
∂Λₚ = λₚ * ∂t[j]
2727
Λₚ += ∂Λₚ
2828
Sₚ = exp(-Λₚ)
29-
num_pop[j] += (Sₚ * ∂Λₚ)
30-
den_pop[j] += Sₚ
29+
∂Nₚ[j] += (Sₚ * ∂Λₚ)
30+
Yₚ[j] += Sₚ
3131
end
3232
for j in 1:Tᵢ
33-
den_excess[j] += 1
33+
Yₒ[j] += 1
3434
end
35-
num_excess[Tᵢ] += Δ[i]
36-
num_variance[Tᵢ] += Δ[i]
35+
∂Nₒ[Tᵢ] += Δ[i]
36+
∂V[Tᵢ] += Δ[i]
3737
end
3838
end
3939

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,19 @@ To call this function:
1515
"""
1616
const EdererII = NPNSEstimator{EdererIIMethod}
1717

18-
function Λ!(::Type{EdererIIMethod}, num_excess, den_excess, num_pop, den_pop, num_variance, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
18+
function Λ!(::EdererIIMethod, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂V, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
1919
for i in eachindex(age)
2020
Tᵢ = searchsortedlast(grid, T[i])
2121
rtᵢ = ratetable[rate_preds[i,:]...]
2222
for j in 1:Tᵢ
2323
λₚ = daily_hazard(rtᵢ, age[i] + grid[j], year[i] + grid[j])
2424
∂Λₚ = λₚ * ∂t[j]
25-
den_excess[j] += 1
26-
den_pop[j] += 1
27-
num_pop[j] += ∂Λₚ
25+
Yₒ[j] += 1
26+
Yₚ[j] += 1
27+
∂Nₚ[j] += ∂Λₚ
2828
end
29-
num_excess[Tᵢ] += Δ[i]
30-
num_variance[Tᵢ] += Δ[i]
29+
∂Nₒ[Tᵢ] += Δ[i]
30+
∂V[Tᵢ] += Δ[i]
3131
end
3232
end
3333

0 commit comments

Comments
 (0)