diff --git a/R/tidy_survfit.R b/R/tidy_survfit.R index bf269ac4..ea780173 100644 --- a/R/tidy_survfit.R +++ b/R/tidy_survfit.R @@ -102,7 +102,6 @@ tidy_survfit <- function(x, # if not a survift2 object, do not attempt to extract information from survfit object if (!inherits(survfit, c("survfit2", "tidycuminc"))) { x$strata_label <- "strata" - # make the stratum a factor so it will sort properly later x$strata <- factor(x$strata, levels = unique(x$strata)) return(x) } @@ -160,10 +159,73 @@ tidy_survfit <- function(x, } } - # make the stratum a factor so it will sort properly later - x$strata <- factor(x$strata, levels = unique(x$strata)) + # Preserve original factor level ordering + factor_reordered <- FALSE + + # Determine the actual grouping variable for strata + # For Cox models with strata(), we need to identify which variable is the strata variable + grouping_var <- NULL + if (!is.null(strata_variables) && length(strata_variables) >= 1) { + # Check if this is a Cox model with strata() by looking at the formula + formula_text <- deparse(formula) + if (grepl("strata\\(", formula_text)) { + # Extract the strata variable from strata() function + strata_match <- regmatches(formula_text, regexpr("strata\\([^)]+\\)", formula_text)) + if (length(strata_match) > 0) { + # Extract variable name from strata(variable_name) + strata_var <- gsub("strata\\(([^)]+)\\)", "\\1", strata_match) + strata_var <- trimws(strata_var) + if (strata_var %in% names(data)) { + grouping_var <- strata_var + } + } + } else { + # regular survival model - use the first strata variable + grouping_var <- strata_variables[1] + } + } + + if (!is.null(grouping_var) && grouping_var %in% names(data) && is.factor(data[[grouping_var]])) { + orig_levels <- levels(data[[grouping_var]]) + current_strata_values <- if (is.factor(x$strata)) levels(x$strata) else unique(x$strata) + + # try exact match first, then partial match + preserved_levels <- character(0) + + for (orig_level in orig_levels) { + # Try exact match first + exact_match <- current_strata_values[current_strata_values == orig_level] + if (length(exact_match) > 0) { + preserved_levels <- c(preserved_levels, exact_match[1]) + } else { + # Try partial match + partial_match <- current_strata_values[grepl(orig_level, current_strata_values, fixed = TRUE)] + if (length(partial_match) > 0) { + preserved_levels <- c(preserved_levels, partial_match[1]) + } + } + } + + # Add any remaining strata that didn't match + remaining_strata <- setdiff(current_strata_values, preserved_levels) + final_levels <- c(preserved_levels, remaining_strata) + + # Only use preserved order if we found matches + if (length(preserved_levels) > 0) { + x$strata <- factor(x$strata, levels = final_levels) + factor_reordered <- TRUE + } else { + x$strata <- factor(x$strata, levels = unique(x$strata)) + } + } else { + x$strata <- factor(x$strata, levels = unique(x$strata)) + } + + # Only reorder data if we actually changed the factor levels + if (factor_reordered) { + x <- x %>% dplyr::arrange(.data$strata) + } - # return tidy tibble x } diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg index 6334f450..55d08b9e 100644 --- a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg +++ b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label.svg @@ -98,10 +98,10 @@ - - - - + + + + @@ -187,28 +187,28 @@ - - - - - - - - - - - - - - - -Male, Asymptomatic -Male, Symptomatic and ambulatory -Male, In bed <50% of the day -Male, In bed > 50% of the day -Female, Asymptomatic -Female, Symptomatic and ambulatory -Female, In bed <50% of the day + + + + + + + + + + + + + + + +Male, Asymptomatic +Female, Asymptomatic +Male, Symptomatic and ambulatory +Male, In bed <50% of the day +Male, In bed > 50% of the day +Female, Symptomatic and ambulatory +Female, In bed <50% of the day @@ -282,25 +282,25 @@ 18 (13) 6 (24) 1 (28) -71 (0) -20 (39) -5 (51) -1 (54) -29 (0) -6 (22) -2 (26) -0 (28) -0 (28) -1 (0) -0 (1) -0 (1) -0 (1) -0 (1) -27 (0) -13 (4) -2 (8) -0 (9) -0 (9) +27 (0) +13 (4) +2 (8) +0 (9) +0 (9) +71 (0) +20 (39) +5 (51) +1 (54) +29 (0) +6 (22) +2 (26) +0 (28) +0 (28) +1 (0) +0 (1) +0 (1) +0 (1) +0 (1) 42 (0) 23 (12) 8 (22) @@ -338,10 +338,10 @@ Female, In bed <50% of the day Female, Symptomatic and ambulatory -Female, Asymptomatic -Male, In bed > 50% of the day -Male, In bed <50% of the day -Male, Symptomatic and ambulatory +Male, In bed > 50% of the day +Male, In bed <50% of the day +Male, Symptomatic and ambulatory +Female, Asymptomatic Male, Asymptomatic @@ -453,22 +453,22 @@ 23% (12, 45) 6% (1, 32) 100% (100, 100) -41% (30, 55) -13% (6, 28) -5% (1, 19) +85% (73, 100) +52% (30, 88) +NA% (NA, NA) +NA% (NA, NA) 100% (100, 100) -24% (13, 46) -8% (2, 30) -NA% (NA, NA) -NA% (NA, NA) +41% (30, 55) +13% (6, 28) +5% (1, 19) 100% (100, 100) -NA% (NA, NA) -NA% (NA, NA) +24% (13, 46) +8% (2, 30) NA% (NA, NA) NA% (NA, NA) 100% (100, 100) -85% (73, 100) -52% (30, 88) +NA% (NA, NA) +NA% (NA, NA) NA% (NA, NA) NA% (NA, NA) 100% (100, 100) @@ -508,10 +508,10 @@ Female, In bed <50% of the day Female, Symptomatic and ambulatory -Female, Asymptomatic -Male, In bed > 50% of the day -Male, In bed <50% of the day -Male, Symptomatic and ambulatory +Male, In bed > 50% of the day +Male, In bed <50% of the day +Male, Symptomatic and ambulatory +Female, Asymptomatic Male, Asymptomatic diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg index f34b484f..1626f30c 100644 --- a/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg +++ b/tests/testthat/_snaps/add_risktable/sf3-risktable-custom-stats-and-label2.svg @@ -88,10 +88,10 @@ - - - - + + + + @@ -177,28 +177,28 @@ - - - - - - - - - - - - - - - -Male, Asymptomatic -Male, Symptomatic and ambulatory -Male, In bed <50% of the day -Male, In bed > 50% of the day -Female, Asymptomatic -Female, Symptomatic and ambulatory -Female, In bed <50% of the day + + + + + + + + + + + + + + + +Male, Asymptomatic +Female, Asymptomatic +Male, Symptomatic and ambulatory +Male, In bed <50% of the day +Male, In bed > 50% of the day +Female, Symptomatic and ambulatory +Female, In bed <50% of the day @@ -272,25 +272,25 @@ 18 (13) 6 (24) 1 (28) -71 (0) -20 (39) -5 (51) -1 (54) -29 (0) -6 (22) -2 (26) -0 (28) -0 (28) -1 (0) -0 (1) -0 (1) -0 (1) -0 (1) -27 (0) -13 (4) -2 (8) -0 (9) -0 (9) +27 (0) +13 (4) +2 (8) +0 (9) +0 (9) +71 (0) +20 (39) +5 (51) +1 (54) +29 (0) +6 (22) +2 (26) +0 (28) +0 (28) +1 (0) +0 (1) +0 (1) +0 (1) +0 (1) 42 (0) 23 (12) 8 (22) @@ -328,10 +328,10 @@ Female, In bed <50% of the day Female, Symptomatic and ambulatory -Female, Asymptomatic -Male, In bed > 50% of the day -Male, In bed <50% of the day -Male, Symptomatic and ambulatory +Male, In bed > 50% of the day +Male, In bed <50% of the day +Male, Symptomatic and ambulatory +Female, Asymptomatic Male, Asymptomatic diff --git a/tests/testthat/_snaps/add_risktable/sf3-risktable.svg b/tests/testthat/_snaps/add_risktable/sf3-risktable.svg index d723252c..f71bcd25 100644 --- a/tests/testthat/_snaps/add_risktable/sf3-risktable.svg +++ b/tests/testthat/_snaps/add_risktable/sf3-risktable.svg @@ -98,10 +98,10 @@ - - - - + + + + @@ -187,28 +187,28 @@ - - - - - - - - - - - - - - - -Male, Asymptomatic -Male, Symptomatic and ambulatory -Male, In bed <50% of the day -Male, In bed > 50% of the day -Female, Asymptomatic -Female, Symptomatic and ambulatory -Female, In bed <50% of the day + + + + + + + + + + + + + + + +Male, Asymptomatic +Female, Asymptomatic +Male, Symptomatic and ambulatory +Male, In bed <50% of the day +Male, In bed > 50% of the day +Female, Symptomatic and ambulatory +Female, In bed <50% of the day @@ -282,23 +282,23 @@ 18 6 1 -71 -20 -5 -1 -29 -6 -2 -0 -0 -1 -0 -0 +27 +13 +2 +0 +0 +71 +20 +5 +1 +29 +6 +2 0 0 -27 -13 -2 +1 +0 +0 0 0 42 @@ -338,10 +338,10 @@ Female, In bed <50% of the day Female, Symptomatic and ambulatory -Female, Asymptomatic -Male, In bed > 50% of the day -Male, In bed <50% of the day -Male, Symptomatic and ambulatory +Male, In bed > 50% of the day +Male, In bed <50% of the day +Male, Symptomatic and ambulatory +Female, Asymptomatic Male, Asymptomatic @@ -453,24 +453,24 @@ 24 28 0 -39 -51 -54 +4 +8 +9 +9 0 -22 -26 -28 -28 +39 +51 +54 0 -1 -1 -1 -1 +22 +26 +28 +28 0 -4 -8 -9 -9 +1 +1 +1 +1 0 12 22 @@ -508,10 +508,10 @@ Female, In bed <50% of the day Female, Symptomatic and ambulatory -Female, Asymptomatic -Male, In bed > 50% of the day -Male, In bed <50% of the day -Male, Symptomatic and ambulatory +Male, In bed > 50% of the day +Male, In bed <50% of the day +Male, Symptomatic and ambulatory +Female, Asymptomatic Male, Asymptomatic diff --git a/tests/testthat/test-add_risktable.R b/tests/testthat/test-add_risktable.R index c908574e..6199c18b 100644 --- a/tests/testthat/test-add_risktable.R +++ b/tests/testthat/test-add_risktable.R @@ -336,12 +336,9 @@ test_that("add_risktable() works with ggsurvfit() `start.time` and negative time test_that("add_risktable() works with multiple survival endpoints (Issue #212)", { - os_data <- df_lung %>% dplyr::mutate(PARAM = "Overall Survival") pfs_data <- df_lung %>% dplyr::mutate(time = time * 0.7, PARAM = "Progression-Free Survival") combined_data <- dplyr::bind_rows(os_data, pfs_data) - - expect_error( p <- survfit2(Surv(time, status) ~ PARAM, data = combined_data) %>% ggsurvfit() + add_risktable(), diff --git a/tests/testthat/test-tidy_survfit.R b/tests/testthat/test-tidy_survfit.R index cac90a70..d009ed8d 100644 --- a/tests/testthat/test-tidy_survfit.R +++ b/tests/testthat/test-tidy_survfit.R @@ -299,3 +299,141 @@ test_that("tidy_survfit() handles custom transformation functions correctly with 1 - original_survival$estimate ) }) + +test_that("tidy_survfit() preserves factor level ordering", { + # Test case 1: Non-alphabetical factor levels + df_test <- df_lung %>% + dplyr::mutate( + sex_ordered = factor(sex, + levels = c("Male", "Female"), # Male should come first + labels = c("Male", "Female")) + ) + + sf_ordered <- survfit2(Surv(time, status) ~ sex_ordered, data = df_test) + result <- tidy_survfit(sf_ordered) + + # Check that Male comes before Female in the factor levels + expect_equal( + levels(result$strata), + c("Male", "Female") + ) + + # Test case 2: Reverse alphabetical order + df_test2 <- df_lung %>% + dplyr::mutate( + sex_reverse = factor(sex, + levels = c("Female", "Male"), # Female should come first + labels = c("Female", "Male")) + ) + + sf_reverse <- survfit2(Surv(time, status) ~ sex_reverse, data = df_test2) + result2 <- tidy_survfit(sf_reverse) + + # Check that Female comes before Male in the factor levels + expect_equal( + levels(result2$strata), + c("Female", "Male") + ) + + # Test case 3: Custom labels with non-alphabetical order + df_test3 <- df_lung %>% + dplyr::mutate( + treatment = factor(ifelse(sex == "Male", "Drug B", "Drug A"), + levels = c("Drug B", "Drug A"), # Drug B should come first + labels = c("Drug B", "Drug A")) + ) + + sf_custom <- survfit2(Surv(time, status) ~ treatment, data = df_test3) + result3 <- tidy_survfit(sf_custom) + + # Check that Drug B comes before Drug A + expect_equal( + levels(result3$strata), + c("Drug B", "Drug A") + ) +}) + +test_that("tidy_cuminc() preserves factor level ordering", { + # Test the exact case from issue #213 + trial_test <- tidycmprsk::trial %>% + dplyr::mutate( + trt_ordered = factor(trt, + levels = c("Drug B", "Drug A"), # Drug B should come first + labels = c("Drug B", "Drug A")) + ) + + cuminc_obj <- tidycmprsk::cuminc(Surv(ttdeath, death_cr) ~ trt_ordered, + data = trial_test) + result <- tidy_cuminc(cuminc_obj) + + # Check that Drug B comes before Drug A in the factor levels + expect_equal( + levels(result$strata), + c("Drug B", "Drug A") + ) + + # Also check that unique values appear in the correct order + expect_equal( + as.character(unique(result$strata)), + c("Drug B", "Drug A") + ) +}) + +test_that("factor ordering fix handles edge cases", { + # Test case 1: Single group (no strata) + sf_single <- survfit2(Surv(time, status) ~ 1, data = df_lung) + expect_error(tidy_survfit(sf_single), NA) + + # Test case 2: Multiple grouping variables (should fall back to original behavior) + sf_multi <- survfit2(Surv(time, status) ~ sex + ph.ecog, data = df_lung) + result_multi <- tidy_survfit(sf_multi) + + # Should not error and should have strata + expect_true("strata" %in% names(result_multi)) + expect_true(is.factor(result_multi$strata)) + + # Test case 3: Non-factor grouping variable (should fall back) + df_char <- df_lung %>% + dplyr::mutate(sex_char = as.character(sex)) + + sf_char <- survfit2(Surv(time, status) ~ sex_char, data = df_char) + result_char <- tidy_survfit(sf_char) + + expect_true("strata" %in% names(result_char)) + expect_true(is.factor(result_char$strata)) + + # Test case 4: Factor with unused levels + df_unused <- df_lung %>% + dplyr::mutate( + sex_extra = factor(sex, + levels = c("Male", "Female", "Other"), + labels = c("Male", "Female", "Other")) + ) + + sf_unused <- survfit2(Surv(time, status) ~ sex_extra, data = df_unused) + result_unused <- tidy_survfit(sf_unused) + + # Should only include levels that actually appear in the data + expect_true(all(levels(result_unused$strata) %in% c("Male", "Female"))) + expect_false("Other" %in% levels(result_unused$strata)) +}) + +test_that("backward compatibility maintained", { + # Test that regular survival objects still work the same way + sf_regular <- survfit(Surv(time, status) ~ sex, data = df_lung) + + expect_error(tidy_survfit(sf_regular), NA) + + result <- tidy_survfit(sf_regular) + expect_true("strata" %in% names(result)) + expect_true(is.factor(result$strata)) + + # Test multi-state models still work + sfms <- survfit2(Surv(ttdeath, death_cr) ~ trt, data = tidycmprsk::trial) + expect_error(tidy_survfit(sfms), NA) + + result_ms <- tidy_survfit(sfms) + expect_true("strata" %in% names(result_ms)) + expect_true(is.factor(result_ms$strata)) +}) +