JuliaSurv
diff --git a/‎.github/workflows/CI.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/CI.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Project.toml‎
Lines changed: 4 additions & 1 deletion b/‎Project.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎data/ccolon.csv‎
Lines changed: 1968 additions & 0 deletions b/‎data/ccolon.csv‎
Lines changed: 1968 additions & 0 deletions
diff --git a/‎docs/src/assets/references.bib‎
Lines changed: 55 additions & 3 deletions b/‎docs/src/assets/references.bib‎
Lines changed: 55 additions & 3 deletions
diff --git a/‎docs/src/getting_started.md‎
Lines changed: 35 additions & 1 deletion b/‎docs/src/getting_started.md‎
Lines changed: 35 additions & 1 deletion
diff --git a/‎src/GraffeoTest.jl‎
Lines changed: 77 additions & 50 deletions b/‎src/GraffeoTest.jl‎
Lines changed: 77 additions & 50 deletions
diff --git a/‎src/EdererI.jl‎ ‎src/NPNSEMethods/EdererI.jl‎src/EdererI.jl renamed to src/NPNSEMethods/EdererI.jl
Lines changed: 6 additions & 6 deletions b/‎src/EdererI.jl‎ ‎src/NPNSEMethods/EdererI.jl‎src/EdererI.jl renamed to src/NPNSEMethods/EdererI.jl
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/EdererII.jl‎ ‎src/NPNSEMethods/EdererII.jl‎src/EdererII.jl renamed to src/NPNSEMethods/EdererII.jl
Lines changed: 6 additions & 6 deletions b/‎src/EdererII.jl‎ ‎src/NPNSEMethods/EdererII.jl‎src/EdererII.jl renamed to src/NPNSEMethods/EdererII.jl
Lines changed: 6 additions & 6 deletions
@@ -26,8 +26,8 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.9'
           - '1.10'
+          - '1' 
         os:
           - ubuntu-latest
         arch:
 
@@ -1,14 +1,17 @@
 name = "NetSurvival"
 uuid = "8f9d5d0e-dd2e-4568-92d4-f8c5d34f25cf"
 authors = ["Oskar Laverny <oskar.laverny@univ-amu.fr> and contributors"]
-version = "0.1.1"
+version = "0.1.2"
 
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+Copulas = "ae264745-0b69-425e-9d9d-cf662c5eec93"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 RateTables = "d40fb65e-c2ee-4113-9e14-cb96ca0acb32"
+Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 
@@ -1,5 +1,5 @@
 @article{PoharPerme2012,
-    author = {Perme, Maja Pohar and Stare, Janez and Estève, Jacques},
+    author = {{Pohar Perme}, Maja and Stare, Janez and Estève, Jacques},
     title = "{On Estimation in Relative Survival}",
     journal = {Biometrics},
     volume = {68},
@@ -92,9 +92,9 @@ @book{ABGK1993
   doi = {10.1007/978-1-4612-4348-9},
 }
 
-@article{PermePavlik2018,
+@article{Pavlik2018,
   title={Nonparametric relative survival analysis with the R package relsurv},
-  author={Perme, Maja Pohar and Pavlic, Klemen},
+  author={{Pohar Perme}, Maja  and Pavlic, Klemen},
   journal={Journal of Statistical Software},
   volume={87},
   pages={1--27},
@@ -119,4 +119,56 @@ @article{cronin2000cumulative
   pages={1729--1740},
   year={2000},
   publisher={Wiley Online Library}
+}
+
+@article{Zadnik2012,
+  title={Cancer burden in Slovenia in comparison with the burden in other European countries},
+  author={Zadnik, Vesna and {\v{Z}}akelj, Maja Primic and Krajc, Mateja},
+  journal={Slovenian Medical Journal},
+  volume={81},
+  number={5},
+  year={2012}
+}
+
+@article{Zadnik2016,
+  title={Cancer patients’ survival: standard calculation methods and some considerations regarding their interpretation},
+  author={Zadnik, Vesna and {\v{Z}}agar, Tina and {\v{Z}}akelj, Maja Primic},
+  journal={Slovenian Journal of Public Health},
+  volume={55},
+  number={2},
+  pages={144--151},
+  year={2016}
+}
+
+@article{Giorgi2003,
+  title={A relative survival regression model using B-spline functions to model non-proportional hazards},
+  author={Giorgi, Roch and Abrahamowicz, Michal and Quantin, Catherine and Bolard, Philippe and Esteve, Jacques and Gouvernet, Joanny and Faivre, Jean},
+  journal={Statistics in medicine},
+  volume={22},
+  number={17},
+  pages={2767--2784},
+  year={2003},
+  publisher={Wiley Online Library}
+}
+
+@article{Wolski2020,
+  title = {A Permutation Test Based on the Restricted Mean Survival Time for Comparison of Net Survival Distributions in Non-Proportional Excess Hazard Settings},
+  author = {Wolski, Anna and Graff{\'e}o, Nathalie and Giorgi, Roch and {the CENSUR working survival group}},
+  year = {2020},
+  month = jun,
+  journal = {Statistical Methods in Medical Research},
+  volume = {29},
+  number = {6},
+  pages = {1612--1623},
+  doi = {10.1177/0962280219870217},
+}
+
+@article{Laverny2025,
+      title={Non-parametric estimation of net survival under dependence between death causes}, 
+      author={Oskar Laverny and Nathalie Grafféo and Roch Giorgi},
+      year={2025},
+      eprint={2502.09273},
+      archivePrefix={arXiv},
+      primaryClass={math.ST},
+      url={https://arxiv.org/abs/2502.09273}, 
 }
@@ -54,7 +54,7 @@ With these definitions and assumptions in mind, we will now present the four dif
 
 where, in the variances, it is understood that when no more individuals are at risk $0/0$ gives $0$. 
 
-The Pohar Perme estimator [PoharPerme2012](@cite) is the newest addition to relative survival analysis between the four methods, particularly designed to handle situations where covariates may change over time. It is trusted from the field (see e.g. [PermePavlik2018](@cite) and [CharvatBelot2021](@cite)) that only this estimator should really be used, the other ones being included mostly for historical reasons and comparisons. 
+The Pohar Perme estimator [PoharPerme2012](@cite) is the newest addition to relative survival analysis between the four methods, particularly designed to handle situations where covariates may change over time. It is trusted from the field (see e.g. [Pavlik2018](@cite) and [CharvatBelot2021](@cite)) that only this estimator should really be used, the other ones being included mostly for historical reasons and comparisons. 
 
 
 ```@docs
@@ -131,6 +131,40 @@ While the estimated lifepsan is directly taken from the `expectation` function.
 nessie
 ```
 
+## Relaxing the independence assumption
+
+The independence assumption of the random vector $(E,P)$ can be relaxed by specifying a dependence structure for this random vector, defined by a copula from [Copulas.jl](https://github.com/lrnv/Copulas.jl). The description of the underlying method to compute the net survival, its variance and associated log-rank tests under these dependence assumptions are given in [Laverny2025](@cite).
+
+The generalization of the Pohar Perme estimator with a given copula `C::Copulas.Copula` can be used as follows: 
+
+```julia
+C = FrankCopula(2,-10)
+fit(GenPoharPerme(C), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
+fit(GraffeoTest(C), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
+```
+
+By default, `GraffeoTest(C::Copula)` uses `GenPoharPerme(C)` as a method to compute net survival in each category, but this modification also allows to use other methods, such as: 
+
+```julia
+fit(GraffeoTest(Ederer1()), @formula(Surv(time, status)~ x1 + x2), data, ratetable)
+```
+
+Even if these results are not supported by any theoretical work and are probably meaningless, it is fun to to see that the code goes through, thanks to the modularity of Julia's dispatch, even on routes that were not designed for.
+
+```@docs
+GenPoharPerme
+```
+
+
+## Available Datasets
+
+Two classroom datasets are provided in the package: 
+
+```@docs
+colrec
+ccolon
+```
+
 ## References
 
 ```@bibliography
 
@@ -49,17 +49,19 @@ The produced test statistics is supposed to follow a chi squared distribution un
 References: 
 * [Graffeo2016](@cite) Grafféo, Nathalie and Castell, Fabienne and Belot, Aurélien and Giorgi, Roch (2016). A Log-Rank-Type Test to Compare Net Survival Distributions.  
 """
-struct GraffeoTest
-    ∂N::Array{Float64, 3}
-    ∂V::Array{Float64, 3}
-    ∂Z::Array{Float64, 3}
-    D::Array{Float64, 3}
-    R::Array{Float64, 3}
-    ∂VZ::Array{Float64, 4}
+struct GraffeoTest{Method}
+    method::Method
+    t::Array{Float64,1}
+    ∂N::Array{Float64, 2}
+    ∂V::Array{Float64, 2}
+    ∂Z::Array{Float64, 2}
+    D::Array{Float64, 2}
+    R::Array{Float64, 2}
+    ∂VZ::Array{Float64, 3}
     stat::Float64
     df::Int64
     pval::Float64
-    function GraffeoTest(T, Δ, age, year, rate_preds, strata, group, ratetable)
+    function GraffeoTest(method::M, T, Δ, age, year, rate_preds, strata, group, ratetable)  where M<:NPNSMethod
 
         # This version of the test is HIGHLY INNEFICIENT. 
         # We should avoid allocating that much memory. 
@@ -70,80 +72,83 @@ struct GraffeoTest
         # get stratas and groups, count them.  
         stratas = unique(strata)
         groups  = unique(group)
-        nstrata = length(stratas)
         ngroups = length(groups)
 
         # Allocate: 
-        ∂N  = zeros(nstrata, ngroups, length(grid))
-        ∂V  = zeros(nstrata, ngroups, length(grid))
-        ∂Z  = zeros(nstrata, ngroups, length(grid))
-        D   = zeros(nstrata, ngroups, length(grid))
-        R   = zeros(nstrata, ngroups, length(grid))
-        ∂VZ = zeros(nstrata, ngroups, ngroups, length(grid))
-
-        num_excess   = zero(grid)
-        num_pop      = zero(grid)
-        num_variance = zero(grid)
-        den_pop      = zero(grid)
-        den_excess   = zero(grid)
+        ∂N  = zeros(ngroups, length(grid))
+        ∂V  = zeros(ngroups, length(grid))
+        ∂Z  = zeros(ngroups, length(grid))
+        D   = zeros(ngroups, length(grid))
+        R   = zeros(ngroups, length(grid))
+        ∂VZ = zeros(ngroups, ngroups, length(grid))
+
+        ∂Nₒ = zero(grid)
+        ∂Nₚ = zero(grid)
+        ∂Vₑ = zero(grid)
+        Yₚ  = zero(grid)
+        Yₒ = zero(grid)
         ∂t = [diff(grid)...,1.0]
 
         # Compute Pohar Perme numerator and denominators on each strata&group (s,g)
         for s in eachindex(stratas)
             for g in eachindex(groups)
                 idx = (group .== groups[g]) .&& (strata .== stratas[s])
-
-                num_excess   .= 0
-                num_pop      .= 0
-                num_variance .= 0
-                den_pop      .= 0
-                den_excess   .= 0
-                Λ!(PoharPermeMethod, num_excess, den_excess, num_pop, den_pop, num_variance, T[idx], Δ[idx], age[idx], year[idx], rate_preds[idx,:], ratetable, grid, ∂t)
-                ∂N[s, g, :] .= num_excess.- num_pop
-                ∂V[s, g, :] .= num_variance
-                D[s, g, :]  .= den_excess
+                ∂Nₒ .= 0
+                ∂Nₚ .= 0
+                ∂Vₑ  .= 0
+                Yₚ  .= 0
+                Yₒ  .= 0
+                Λ!(method, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂Vₑ, T[idx], Δ[idx], age[idx], year[idx], rate_preds[idx,:], ratetable, grid, ∂t)
+                ∂N[g, :] .= ∂Nₒ .- ∂Nₚ
+                ∂V[g, :] .= ∂Vₑ
+                D[g, :]  .= Yₒ
             end
-        end
-
-        # renormalize on groups, be carefull for zeros. 
-        R .= ifelse.(sum(D,dims=2) .== 0, 0, D ./ sum(D,dims=2))
-        ∂Z .= ∂N .- R .* sum(∂N,dims=2)
+            
+            R .= ifelse.(sum(D,dims=1) .== 0, 0, D ./ sum(D,dims=1))
+            ∂Z .+= ∂N .- R .* sum(∂N,dims=1)
 
-        # Compute test variance on each strata
-        for s in eachindex(stratas)
+            # Compute test variance
             for ℓ in eachindex(groups)
                 for g in eachindex(groups)
                     for h in eachindex(groups)
                         for t in eachindex(grid)
-                            ∂VZ[s, g, h,t] += ((g==ℓ) - R[s, g, t]) * ((h==ℓ) - R[s, h, t]) .* ∂V[s, ℓ, t]
+                            ∂VZ[g, h,t] += ((g==ℓ) - R[g, t]) * ((h==ℓ) - R[h, t]) .* ∂V[ℓ, t]
                         end
                     end
                 end
             end
         end
 
-        # Cumulate accross time and stratas
-        Z =  dropdims(sum(∂Z, dims=(1,3)), dims=(1,3))
-        VZ = dropdims(sum(∂VZ, dims=(1,4)), dims=(1,4))
+        # Cumulate accross time
+        Z =  dropdims(sum(∂Z, dims=2), dims=2)
+        VZ = dropdims(sum(∂VZ, dims=3), dims=3)
 
         # Finally compute the stat and p-values:
         stat = dot(Z[1:end-1],(VZ[1:end-1,1:end-1] \ Z[1:end-1])) # test statistic
         df = ngroups-1 # number of degree of freedom of the chi-square test
         pval = ccdf(Chisq(df), stat[1]) # Obtained p-value. 
-        return new(∂N, ∂V, ∂Z, D, R, ∂VZ, stat[1], df, pval)
+        return new{M}(method,grid,∂N, ∂V, ∂Z, D, R, ∂VZ, stat[1], df, pval)
     end
 end
 
-function Base.show(io::IO, test::GraffeoTest)
-    println(io, "Grafféo's log-rank-type-test")
-    df = DataFrame(test_statistic = test.stat, degrees_of_freedom = test.df, p_value = test.pval)
-    show(io, df)
+struct GraffeoTestHolder{T}
+    m::T
 end
 
-# The fitting and formula interfaces should be here. 
+GraffeoTest(T, Δ, age, year, rate_preds, strata, group, ratetable) = GraffeoTest(PoharPermeMethod(),T, Δ, age, year, rate_preds, strata, group, ratetable)
 
-function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {E<:GraffeoTest}
+GraffeoTest()                                    = GraffeoTestHolder(PoharPermeMethod())
+GraffeoTest(m::M)      where M<:NPNSMethod       = GraffeoTestHolder(m)
+GraffeoTest{M}()       where M<:NPNSMethod       = GraffeoTestHolder(M())
+GraffeoTest(::Type{M}) where M<:NPNSMethod       = GraffeoTestHolder(M())
+GraffeoTest(::Type{NPNSEstimator{M}}) where M<:NPNSMethod       = GraffeoTestHolder(M())
+GraffeoTest(C::Cop)    where Cop<:Copulas.Copula = GraffeoTest(GenPoharPermeMethod(C))
 
+StatsBase.fit(X::GraffeoTestHolder{M}, formula, df, rt) where M<:NPNSMethod   = StatsBase.fit(GraffeoTest, X.m,                formula, df, rt)
+StatsBase.fit(::Type{GraffeoTest},     formula, df, rt)                       = StatsBase.fit(GraffeoTest(PoharPermeMethod()), formula, df, rt)
+StatsBase.fit(::Type{GraffeoTest{M}},  formula, df, rt) where {M<:NPNSMethod} = StatsBase.fit(GraffeoTest, M(),                formula, df, rt)
+
+function StatsBase.fit(GTH::GraffeoTestHolder{M}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {M<:NPNSMethod}
     terms = StatsModels.termvars(formula.rhs)
     tf = typeof(formula.rhs)
     types = (tf <: AbstractTerm) ? [tf] : typeof.(formula.rhs)
@@ -155,5 +160,27 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT
     resp = modelcols(apply_schema(formula,schema(df)).lhs,df)
     rate_predictors = _get_rate_predictors(rt,df)
 
-    return GraffeoTest(resp[:,1], resp[:,2], df.age, df.year, select(df,rate_predictors), strata, group, rt)
+    return GraffeoTest(GTH.m, resp[:,1], resp[:,2], df.age, df.year, select(df,rate_predictors), strata, group, rt)
 end
+
+# A show function: 
+function Base.show(io::IO, test::GraffeoTest)
+    println(io, "Grafféo's log-rank-type-test (Method: $(test.method))")
+    df = DataFrame(test_statistic = test.stat, degrees_of_freedom = test.df, p_value = test.pval)
+    show(io, df)
+end
+
+# Potential other extraction methods: 
+function statistic(X::GraffeoTest, at_time_T)
+    # Cumulate accross time
+    i_T = findlast(X.t .<= at_time_T)
+    Z =  dropdims(sum(X.∂Z[:,1:i_T], dims=2), dims=2)
+    Γ = dropdims(sum(X.∂VZ[:,:,1:i_T], dims=3), dims=3)
+    stat = dot(Z[1:end-1],Γ[1:end-1,1:end-1] \ Z[1:end-1]) # test statistic
+    return stat
+end 
+function pvalue(X::GraffeoTest, at_time_T)
+    stat = statistic(X, at_time_T)
+    df = size(X.∂Z,1)-1 # number of degree of freedom of the chi-square test
+    return ccdf(Chisq(df), stat[1]) # Obtained p-value.
+end  
@@ -15,7 +15,7 @@ To call this function:
 """
 const EdererI = NPNSEstimator{EdererIMethod}
 
-function Λ!(::Type{EdererIMethod}, num_excess, den_excess, num_pop, den_pop, num_variance, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
+function Λ!(::EdererIMethod, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂V, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
     Tmax= Int(maximum(T))
     for i in eachindex(age)
         Tᵢ = searchsortedlast(grid, T[i])
@@ -26,14 +26,14 @@ function Λ!(::Type{EdererIMethod}, num_excess, den_excess, num_pop, den_pop, nu
             ∂Λₚ         = λₚ * ∂t[j]
             Λₚ         += ∂Λₚ
             Sₚ          = exp(-Λₚ)
-            num_pop[j] += (Sₚ * ∂Λₚ)
-            den_pop[j] += Sₚ
+            ∂Nₚ[j] += (Sₚ * ∂Λₚ)
+            Yₚ[j] += Sₚ
         end
         for j in 1:Tᵢ
-            den_excess[j] += 1
+            Yₒ[j] += 1
         end
-        num_excess[Tᵢ]   += Δ[i]
-        num_variance[Tᵢ]   += Δ[i]    
+        ∂Nₒ[Tᵢ]   += Δ[i]
+        ∂V[Tᵢ]   += Δ[i]    
     end
 end
 
 
@@ -15,19 +15,19 @@ To call this function:
 """
 const EdererII = NPNSEstimator{EdererIIMethod}
 
-function Λ!(::Type{EdererIIMethod}, num_excess, den_excess, num_pop, den_pop, num_variance, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
+function Λ!(::EdererIIMethod, ∂Nₒ, Yₒ, ∂Nₚ, Yₚ, ∂V, T, Δ, age, year, rate_preds, ratetable, grid, ∂t)
     for i in eachindex(age)
         Tᵢ = searchsortedlast(grid, T[i])
         rtᵢ = ratetable[rate_preds[i,:]...]
         for j in 1:Tᵢ
             λₚ          = daily_hazard(rtᵢ, age[i] + grid[j], year[i] + grid[j])
             ∂Λₚ         = λₚ * ∂t[j]
-            den_excess[j] += 1
-            den_pop[j] += 1
-            num_pop[j] += ∂Λₚ
+            Yₒ[j] += 1
+            Yₚ[j] += 1
+            ∂Nₚ[j] += ∂Λₚ
         end
-        num_excess[Tᵢ]   += Δ[i]  
-        num_variance[Tᵢ]   += Δ[i]  
+        ∂Nₒ[Tᵢ]   += Δ[i]  
+        ∂V[Tᵢ]   += Δ[i]  
     end
 end