diff --git a/R/tidy_survfit.R b/R/tidy_survfit.R
index bf269ac4..ea780173 100644
--- a/R/tidy_survfit.R
+++ b/R/tidy_survfit.R
@@ -102,7 +102,6 @@ tidy_survfit <- function(x,
# if not a survift2 object, do not attempt to extract information from survfit object
if (!inherits(survfit, c("survfit2", "tidycuminc"))) {
x$strata_label <- "strata"
- # make the stratum a factor so it will sort properly later
x$strata <- factor(x$strata, levels = unique(x$strata))
return(x)
}
@@ -160,10 +159,73 @@ tidy_survfit <- function(x,
}
}
- # make the stratum a factor so it will sort properly later
- x$strata <- factor(x$strata, levels = unique(x$strata))
+ # Preserve original factor level ordering
+ factor_reordered <- FALSE
+
+ # Determine the actual grouping variable for strata
+ # For Cox models with strata(), we need to identify which variable is the strata variable
+ grouping_var <- NULL
+ if (!is.null(strata_variables) && length(strata_variables) >= 1) {
+ # Check if this is a Cox model with strata() by looking at the formula
+ formula_text <- deparse(formula)
+ if (grepl("strata\\(", formula_text)) {
+ # Extract the strata variable from strata() function
+ strata_match <- regmatches(formula_text, regexpr("strata\\([^)]+\\)", formula_text))
+ if (length(strata_match) > 0) {
+ # Extract variable name from strata(variable_name)
+ strata_var <- gsub("strata\\(([^)]+)\\)", "\\1", strata_match)
+ strata_var <- trimws(strata_var)
+ if (strata_var %in% names(data)) {
+ grouping_var <- strata_var
+ }
+ }
+ } else {
+ # regular survival model - use the first strata variable
+ grouping_var <- strata_variables[1]
+ }
+ }
+
+ if (!is.null(grouping_var) && grouping_var %in% names(data) && is.factor(data[[grouping_var]])) {
+ orig_levels <- levels(data[[grouping_var]])
+ current_strata_values <- if (is.factor(x$strata)) levels(x$strata) else unique(x$strata)
+
+ # try exact match first, then partial match
+ preserved_levels <- character(0)
+
+ for (orig_level in orig_levels) {
+ # Try exact match first
+ exact_match <- current_strata_values[current_strata_values == orig_level]
+ if (length(exact_match) > 0) {
+ preserved_levels <- c(preserved_levels, exact_match[1])
+ } else {
+ # Try partial match
+ partial_match <- current_strata_values[grepl(orig_level, current_strata_values, fixed = TRUE)]
+ if (length(partial_match) > 0) {
+ preserved_levels <- c(preserved_levels, partial_match[1])
+ }
+ }
+ }
+
+ # Add any remaining strata that didn't match
+ remaining_strata <- setdiff(current_strata_values, preserved_levels)
+ final_levels <- c(preserved_levels, remaining_strata)
+
+ # Only use preserved order if we found matches
+ if (length(preserved_levels) > 0) {
+ x$strata <- factor(x$strata, levels = final_levels)
+ factor_reordered <- TRUE
+ } else {
+ x$strata <- factor(x$strata, levels = unique(x$strata))
+ }
+ } else {
+ x$strata <- factor(x$strata, levels = unique(x$strata))
+ }
+
+ # Only reorder data if we actually changed the factor levels
+ if (factor_reordered) {
+ x <- x %>% dplyr::arrange(.data$strata)
+ }
- # return tidy tibble
x
}
diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg
index 6334f450..55d08b9e 100644
--- a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg
+++ b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg
@@ -98,10 +98,10 @@
-
-
-
-
+
+
+
+
@@ -187,28 +187,28 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Male, Asymptomatic
-Male, Symptomatic and ambulatory
-Male, In bed <50% of the day
-Male, In bed > 50% of the day
-Female, Asymptomatic
-Female, Symptomatic and ambulatory
-Female, In bed <50% of the day
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Male, Asymptomatic
+Female, Asymptomatic
+Male, Symptomatic and ambulatory
+Male, In bed <50% of the day
+Male, In bed > 50% of the day
+Female, Symptomatic and ambulatory
+Female, In bed <50% of the day
@@ -282,25 +282,25 @@
18 (13)
6 (24)
1 (28)
-71 (0)
-20 (39)
-5 (51)
-1 (54)
-29 (0)
-6 (22)
-2 (26)
-0 (28)
-0 (28)
-1 (0)
-0 (1)
-0 (1)
-0 (1)
-0 (1)
-27 (0)
-13 (4)
-2 (8)
-0 (9)
-0 (9)
+27 (0)
+13 (4)
+2 (8)
+0 (9)
+0 (9)
+71 (0)
+20 (39)
+5 (51)
+1 (54)
+29 (0)
+6 (22)
+2 (26)
+0 (28)
+0 (28)
+1 (0)
+0 (1)
+0 (1)
+0 (1)
+0 (1)
42 (0)
23 (12)
8 (22)
@@ -338,10 +338,10 @@
Female, In bed <50% of the day
Female, Symptomatic and ambulatory
-Female, Asymptomatic
-Male, In bed > 50% of the day
-Male, In bed <50% of the day
-Male, Symptomatic and ambulatory
+Male, In bed > 50% of the day
+Male, In bed <50% of the day
+Male, Symptomatic and ambulatory
+Female, Asymptomatic
Male, Asymptomatic
@@ -453,22 +453,22 @@
23% (12, 45)
6% (1, 32)
100% (100, 100)
-41% (30, 55)
-13% (6, 28)
-5% (1, 19)
+85% (73, 100)
+52% (30, 88)
+NA% (NA, NA)
+NA% (NA, NA)
100% (100, 100)
-24% (13, 46)
-8% (2, 30)
-NA% (NA, NA)
-NA% (NA, NA)
+41% (30, 55)
+13% (6, 28)
+5% (1, 19)
100% (100, 100)
-NA% (NA, NA)
-NA% (NA, NA)
+24% (13, 46)
+8% (2, 30)
NA% (NA, NA)
NA% (NA, NA)
100% (100, 100)
-85% (73, 100)
-52% (30, 88)
+NA% (NA, NA)
+NA% (NA, NA)
NA% (NA, NA)
NA% (NA, NA)
100% (100, 100)
@@ -508,10 +508,10 @@
Female, In bed <50% of the day
Female, Symptomatic and ambulatory
-Female, Asymptomatic
-Male, In bed > 50% of the day
-Male, In bed <50% of the day
-Male, Symptomatic and ambulatory
+Male, In bed > 50% of the day
+Male, In bed <50% of the day
+Male, Symptomatic and ambulatory
+Female, Asymptomatic
Male, Asymptomatic
diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg
index f34b484f..1626f30c 100644
--- a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg
+++ b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg
@@ -88,10 +88,10 @@
-
-
-
-
+
+
+
+
@@ -177,28 +177,28 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Male, Asymptomatic
-Male, Symptomatic and ambulatory
-Male, In bed <50% of the day
-Male, In bed > 50% of the day
-Female, Asymptomatic
-Female, Symptomatic and ambulatory
-Female, In bed <50% of the day
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Male, Asymptomatic
+Female, Asymptomatic
+Male, Symptomatic and ambulatory
+Male, In bed <50% of the day
+Male, In bed > 50% of the day
+Female, Symptomatic and ambulatory
+Female, In bed <50% of the day
@@ -272,25 +272,25 @@
18 (13)
6 (24)
1 (28)
-71 (0)
-20 (39)
-5 (51)
-1 (54)
-29 (0)
-6 (22)
-2 (26)
-0 (28)
-0 (28)
-1 (0)
-0 (1)
-0 (1)
-0 (1)
-0 (1)
-27 (0)
-13 (4)
-2 (8)
-0 (9)
-0 (9)
+27 (0)
+13 (4)
+2 (8)
+0 (9)
+0 (9)
+71 (0)
+20 (39)
+5 (51)
+1 (54)
+29 (0)
+6 (22)
+2 (26)
+0 (28)
+0 (28)
+1 (0)
+0 (1)
+0 (1)
+0 (1)
+0 (1)
42 (0)
23 (12)
8 (22)
@@ -328,10 +328,10 @@
Female, In bed <50% of the day
Female, Symptomatic and ambulatory
-Female, Asymptomatic
-Male, In bed > 50% of the day
-Male, In bed <50% of the day
-Male, Symptomatic and ambulatory
+Male, In bed > 50% of the day
+Male, In bed <50% of the day
+Male, Symptomatic and ambulatory
+Female, Asymptomatic
Male, Asymptomatic
diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable.svg
index d723252c..f71bcd25 100644
--- a/tests/testthat/_snaps/add_risktable/sf3-risktable.svg
+++ b/tests/testthat/_snaps/add_risktable/sf3-risktable.svg
@@ -98,10 +98,10 @@
-
-
-
-
+
+
+
+
@@ -187,28 +187,28 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Male, Asymptomatic
-Male, Symptomatic and ambulatory
-Male, In bed <50% of the day
-Male, In bed > 50% of the day
-Female, Asymptomatic
-Female, Symptomatic and ambulatory
-Female, In bed <50% of the day
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Male, Asymptomatic
+Female, Asymptomatic
+Male, Symptomatic and ambulatory
+Male, In bed <50% of the day
+Male, In bed > 50% of the day
+Female, Symptomatic and ambulatory
+Female, In bed <50% of the day
@@ -282,23 +282,23 @@
18
6
1
-71
-20
-5
-1
-29
-6
-2
-0
-0
-1
-0
-0
+27
+13
+2
+0
+0
+71
+20
+5
+1
+29
+6
+2
0
0
-27
-13
-2
+1
+0
+0
0
0
42
@@ -338,10 +338,10 @@
Female, In bed <50% of the day
Female, Symptomatic and ambulatory
-Female, Asymptomatic
-Male, In bed > 50% of the day
-Male, In bed <50% of the day
-Male, Symptomatic and ambulatory
+Male, In bed > 50% of the day
+Male, In bed <50% of the day
+Male, Symptomatic and ambulatory
+Female, Asymptomatic
Male, Asymptomatic
@@ -453,24 +453,24 @@
24
28
0
-39
-51
-54
+4
+8
+9
+9
0
-22
-26
-28
-28
+39
+51
+54
0
-1
-1
-1
-1
+22
+26
+28
+28
0
-4
-8
-9
-9
+1
+1
+1
+1
0
12
22
@@ -508,10 +508,10 @@
Female, In bed <50% of the day
Female, Symptomatic and ambulatory
-Female, Asymptomatic
-Male, In bed > 50% of the day
-Male, In bed <50% of the day
-Male, Symptomatic and ambulatory
+Male, In bed > 50% of the day
+Male, In bed <50% of the day
+Male, Symptomatic and ambulatory
+Female, Asymptomatic
Male, Asymptomatic
diff --git a/tests/testthat/test-add_risktable.R b/tests/testthat/test-add_risktable.R
index c908574e..6199c18b 100644
--- a/tests/testthat/test-add_risktable.R
+++ b/tests/testthat/test-add_risktable.R
@@ -336,12 +336,9 @@ test_that("add_risktable() works with ggsurvfit() `start.time` and negative time
test_that("add_risktable() works with multiple survival endpoints (Issue #212)", {
-
os_data <- df_lung %>% dplyr::mutate(PARAM = "Overall Survival")
pfs_data <- df_lung %>% dplyr::mutate(time = time * 0.7, PARAM = "Progression-Free Survival")
combined_data <- dplyr::bind_rows(os_data, pfs_data)
-
-
expect_error(
p <- survfit2(Surv(time, status) ~ PARAM, data = combined_data) %>%
ggsurvfit() + add_risktable(),
diff --git a/tests/testthat/test-tidy_survfit.R b/tests/testthat/test-tidy_survfit.R
index cac90a70..d009ed8d 100644
--- a/tests/testthat/test-tidy_survfit.R
+++ b/tests/testthat/test-tidy_survfit.R
@@ -299,3 +299,141 @@ test_that("tidy_survfit() handles custom transformation functions correctly with
1 - original_survival$estimate
)
})
+
+test_that("tidy_survfit() preserves factor level ordering", {
+ # Test case 1: Non-alphabetical factor levels
+ df_test <- df_lung %>%
+ dplyr::mutate(
+ sex_ordered = factor(sex,
+ levels = c("Male", "Female"), # Male should come first
+ labels = c("Male", "Female"))
+ )
+
+ sf_ordered <- survfit2(Surv(time, status) ~ sex_ordered, data = df_test)
+ result <- tidy_survfit(sf_ordered)
+
+ # Check that Male comes before Female in the factor levels
+ expect_equal(
+ levels(result$strata),
+ c("Male", "Female")
+ )
+
+ # Test case 2: Reverse alphabetical order
+ df_test2 <- df_lung %>%
+ dplyr::mutate(
+ sex_reverse = factor(sex,
+ levels = c("Female", "Male"), # Female should come first
+ labels = c("Female", "Male"))
+ )
+
+ sf_reverse <- survfit2(Surv(time, status) ~ sex_reverse, data = df_test2)
+ result2 <- tidy_survfit(sf_reverse)
+
+ # Check that Female comes before Male in the factor levels
+ expect_equal(
+ levels(result2$strata),
+ c("Female", "Male")
+ )
+
+ # Test case 3: Custom labels with non-alphabetical order
+ df_test3 <- df_lung %>%
+ dplyr::mutate(
+ treatment = factor(ifelse(sex == "Male", "Drug B", "Drug A"),
+ levels = c("Drug B", "Drug A"), # Drug B should come first
+ labels = c("Drug B", "Drug A"))
+ )
+
+ sf_custom <- survfit2(Surv(time, status) ~ treatment, data = df_test3)
+ result3 <- tidy_survfit(sf_custom)
+
+ # Check that Drug B comes before Drug A
+ expect_equal(
+ levels(result3$strata),
+ c("Drug B", "Drug A")
+ )
+})
+
+test_that("tidy_cuminc() preserves factor level ordering", {
+ # Test the exact case from issue #213
+ trial_test <- tidycmprsk::trial %>%
+ dplyr::mutate(
+ trt_ordered = factor(trt,
+ levels = c("Drug B", "Drug A"), # Drug B should come first
+ labels = c("Drug B", "Drug A"))
+ )
+
+ cuminc_obj <- tidycmprsk::cuminc(Surv(ttdeath, death_cr) ~ trt_ordered,
+ data = trial_test)
+ result <- tidy_cuminc(cuminc_obj)
+
+ # Check that Drug B comes before Drug A in the factor levels
+ expect_equal(
+ levels(result$strata),
+ c("Drug B", "Drug A")
+ )
+
+ # Also check that unique values appear in the correct order
+ expect_equal(
+ as.character(unique(result$strata)),
+ c("Drug B", "Drug A")
+ )
+})
+
+test_that("factor ordering fix handles edge cases", {
+ # Test case 1: Single group (no strata)
+ sf_single <- survfit2(Surv(time, status) ~ 1, data = df_lung)
+ expect_error(tidy_survfit(sf_single), NA)
+
+ # Test case 2: Multiple grouping variables (should fall back to original behavior)
+ sf_multi <- survfit2(Surv(time, status) ~ sex + ph.ecog, data = df_lung)
+ result_multi <- tidy_survfit(sf_multi)
+
+ # Should not error and should have strata
+ expect_true("strata" %in% names(result_multi))
+ expect_true(is.factor(result_multi$strata))
+
+ # Test case 3: Non-factor grouping variable (should fall back)
+ df_char <- df_lung %>%
+ dplyr::mutate(sex_char = as.character(sex))
+
+ sf_char <- survfit2(Surv(time, status) ~ sex_char, data = df_char)
+ result_char <- tidy_survfit(sf_char)
+
+ expect_true("strata" %in% names(result_char))
+ expect_true(is.factor(result_char$strata))
+
+ # Test case 4: Factor with unused levels
+ df_unused <- df_lung %>%
+ dplyr::mutate(
+ sex_extra = factor(sex,
+ levels = c("Male", "Female", "Other"),
+ labels = c("Male", "Female", "Other"))
+ )
+
+ sf_unused <- survfit2(Surv(time, status) ~ sex_extra, data = df_unused)
+ result_unused <- tidy_survfit(sf_unused)
+
+ # Should only include levels that actually appear in the data
+ expect_true(all(levels(result_unused$strata) %in% c("Male", "Female")))
+ expect_false("Other" %in% levels(result_unused$strata))
+})
+
+test_that("backward compatibility maintained", {
+ # Test that regular survival objects still work the same way
+ sf_regular <- survfit(Surv(time, status) ~ sex, data = df_lung)
+
+ expect_error(tidy_survfit(sf_regular), NA)
+
+ result <- tidy_survfit(sf_regular)
+ expect_true("strata" %in% names(result))
+ expect_true(is.factor(result$strata))
+
+ # Test multi-state models still work
+ sfms <- survfit2(Surv(ttdeath, death_cr) ~ trt, data = tidycmprsk::trial)
+ expect_error(tidy_survfit(sfms), NA)
+
+ result_ms <- tidy_survfit(sfms)
+ expect_true("strata" %in% names(result_ms))
+ expect_true(is.factor(result_ms$strata))
+})
+