Add three complex formatter test cases to misc.R.test

Copilot · felix-andreas · Copilot · commit 22c48f9e38fb · 2025-07-22T21:59:36.000Z
Co-authored-by: felix-andreas &lt;24651767+felix-andreas@users.noreply.github.com&gt;
diff --git a/crates/roughly/tests/format/misc.R.test b/crates/roughly/tests/format/misc.R.test
@@ -36,3 +36,221 @@ fred <- FALSE
 plugh <- NULL
 xyzzy <- Inf
 thud <- NaN
+
+#==== complex_data_manipulation
+#---- nested_data_pipeline
+# Complex data manipulation with nested function calls and pipeline operators
+library(dplyr)
+library(tidyr)
+
+result <- raw_data %>%
+    filter(
+        !is.na(value) &
+        year >= 2020 &
+        category %in% c("A", "B", "C")
+    ) %>%
+    group_by(
+        region,
+        category,
+        quarter = paste0("Q", ceiling(month(date) / 3))
+    ) %>%
+    summarise(
+        mean_value = mean(value, na.rm = TRUE),
+        median_value = median(value, na.rm = TRUE),
+        count = n(),
+        .groups = "drop"
+    ) %>%
+    pivot_wider(
+        names_from = quarter,
+        values_from = c(mean_value, median_value, count),
+        names_sep = "_"
+    ) %>%
+    mutate(
+        total_mean = rowMeans(
+            select(., starts_with("mean_value")),
+            na.rm = TRUE
+        ),
+        variance_ratio = case_when(
+            total_mean > 100 ~ "high",
+            total_mean > 50 ~ "medium",
+            TRUE ~ "low"
+        )
+    ) %>%
+    arrange(desc(total_mean))
+
+#==== statistical_modeling
+#---- complex_regression_analysis
+# Advanced statistical modeling with complex nested expressions
+library(lme4)
+library(broom.mixed)
+
+# Complex mixed-effects model with nested grouping
+model <- lmer(
+    response ~ 
+        poly(time, 3) * treatment +
+        I(log(baseline_measure + 1)) +
+        scale(covariate1) +
+        I(covariate2^2) +
+        interaction_term +
+        (1 + time | subject_id) +
+        (1 | site_id/clinic_id),
+    data = analysis_data,
+    weights = 1 / sqrt(variance_weights),
+    control = lmerControl(
+        optimizer = "bobyqa",
+        optCtrl = list(maxfun = 100000),
+        check.conv.singular = .makeCC(
+            action = "warning",
+            tol = 1e-4
+        )
+    )
+)
+
+# Complex post-hoc analysis with nested comparisons
+contrasts_result <- emmeans(
+    model,
+    pairwise ~ treatment | time,
+    at = list(
+        time = c(0, 30, 60, 90),
+        baseline_measure = mean(analysis_data$baseline_measure, na.rm = TRUE)
+    ),
+    adjust = "tukey"
+) %>%
+    map_dfr(
+        ~ .x %>%
+            as_tibble() %>%
+            mutate(
+                effect_size = estimate / sqrt(
+                    sum(residuals(model)^2) / df.residual(model)
+                ),
+                significant = ifelse(
+                    p.value < 0.05,
+                    ifelse(p.value < 0.01, "**", "*"),
+                    "ns"
+                )
+            ),
+        .id = "comparison_type"
+    )
+
+#==== complex_control_flow
+#---- nested_error_handling_validation
+# Complex control flow with nested conditionals and error handling
+process_data_batch <- function(data_list, config = list()) {
+    tryCatch({
+        # Validate inputs with nested conditions
+        if (is.null(data_list) || length(data_list) == 0) {
+            stop("Input data_list cannot be NULL or empty")
+        }
+        
+        for (i in seq_along(data_list)) {
+            current_data <- data_list[[i]]
+            
+            # Nested validation with complex conditions
+            if (!is.data.frame(current_data)) {
+                warning(paste("Element", i, "is not a data frame, skipping"))
+                next
+            }
+            
+            required_cols <- c("id", "timestamp", "value")
+            missing_cols <- setdiff(required_cols, names(current_data))
+            
+            if (length(missing_cols) > 0) {
+                if (config$strict_mode %||% FALSE) {
+                    stop(paste(
+                        "Missing required columns in element", i, ":",
+                        paste(missing_cols, collapse = ", ")
+                    ))
+                } else {
+                    warning(paste(
+                        "Missing columns in element", i, ", filling with defaults:",
+                        paste(missing_cols, collapse = ", ")
+                    ))
+                    
+                    # Complex default value assignment
+                    for (col in missing_cols) {
+                        current_data[[col]] <- switch(col,
+                            "id" = paste0("auto_", seq_len(nrow(current_data))),
+                            "timestamp" = rep(Sys.time(), nrow(current_data)),
+                            "value" = rep(NA_real_, nrow(current_data)),
+                            rep(NA, nrow(current_data))
+                        )
+                    }
+                }
+            }
+            
+            # Complex data transformation with nested operations
+            processed <- current_data %>%
+                filter(!is.na(value)) %>%
+                mutate(
+                    normalized_value = case_when(
+                        is.finite(value) && value >= 0 ~ 
+                            (value - min(value, na.rm = TRUE)) / 
+                            (max(value, na.rm = TRUE) - min(value, na.rm = TRUE)),
+                        is.finite(value) && value < 0 ~ 
+                            pmax(-1, pmin(0, value / abs(min(value, na.rm = TRUE)))),
+                        TRUE ~ NA_real_
+                    ),
+                    quality_flag = ifelse(
+                        abs(scale(value)[, 1]) > 3,
+                        "outlier",
+                        ifelse(
+                            is.na(value) || !is.finite(value),
+                            "invalid",
+                            "valid"
+                        )
+                    )
+                ) %>%
+                arrange(timestamp, id)
+            
+            data_list[[i]] <- processed
+        }
+        
+        # Final aggregation with complex grouping
+        final_result <- bind_rows(data_list, .id = "batch_id") %>%
+            group_by(batch_id) %>%
+            summarise(
+                total_records = n(),
+                valid_records = sum(quality_flag == "valid", na.rm = TRUE),
+                mean_value = mean(normalized_value, na.rm = TRUE),
+                outlier_rate = mean(quality_flag == "outlier", na.rm = TRUE),
+                completeness = mean(!is.na(normalized_value)),
+                .groups = "drop"
+            ) %>%
+            mutate(
+                quality_score = (
+                    0.4 * completeness +
+                    0.3 * (1 - outlier_rate) +
+                    0.3 * pmin(1, valid_records / total_records)
+                ),
+                grade = case_when(
+                    quality_score >= 0.9 ~ "A",
+                    quality_score >= 0.8 ~ "B", 
+                    quality_score >= 0.7 ~ "C",
+                    quality_score >= 0.6 ~ "D",
+                    TRUE ~ "F"
+                )
+            )
+        
+        return(final_result)
+        
+    }, error = function(e) {
+        if (config$debug_mode %||% FALSE) {
+            cat("Debug info - Error occurred at:",
+                format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\n")
+            print(str(data_list))
+        }
+        
+        stop(paste("Error in process_data_batch:", e$message))
+        
+    }, warning = function(w) {
+        if (config$verbose %||% TRUE) {
+            message("Warning in process_data_batch: ", w$message)
+        }
+        invokeRestart("muffleWarning")
+        
+    }, finally = {
+        if (config$cleanup %||% TRUE) {
+            gc()  # Force garbage collection
+        }
+    })
+}
diff --git a/crates/roughly/tests/snapshots/test_format__complex_control_flow__nested_error_handling_validation.snap b/crates/roughly/tests/snapshots/test_format__complex_control_flow__nested_error_handling_validation.snap
@@ -0,0 +1,134 @@
+---
+source: crates/roughly/tests/test_format.rs
+expression: code
+---
+# Complex control flow with nested conditionals and error handling
+process_data_batch <- function(data_list, config = list()) {
+  tryCatch(
+    {
+      # Validate inputs with nested conditions
+      if (is.null(data_list) || length(data_list) == 0) {
+        stop("Input data_list cannot be NULL or empty")
+      }
+
+      for (i in seq_along(data_list)) {
+        current_data <- data_list[[i]]
+
+        # Nested validation with complex conditions
+        if (!is.data.frame(current_data)) {
+          warning(paste("Element", i, "is not a data frame, skipping"))
+          next
+        }
+
+        required_cols <- c("id", "timestamp", "value")
+        missing_cols <- setdiff(required_cols, names(current_data))
+
+        if (length(missing_cols) > 0) {
+          if (config$strict_mode %||% FALSE) {
+            stop(paste(
+              "Missing required columns in element",
+              i,
+              ":",
+              paste(missing_cols, collapse = ", ")
+            ))
+          } else {
+            warning(paste(
+              "Missing columns in element",
+              i,
+              ", filling with defaults:",
+              paste(missing_cols, collapse = ", ")
+            ))
+
+            # Complex default value assignment
+            for (col in missing_cols) {
+              current_data[[col]] <- switch(
+                col,
+                "id" = paste0("auto_", seq_len(nrow(current_data))),
+                "timestamp" = rep(Sys.time(), nrow(current_data)),
+                "value" = rep(NA_real_, nrow(current_data)),
+                rep(NA, nrow(current_data))
+              )
+            }
+          }
+        }
+
+        # Complex data transformation with nested operations
+        processed <- current_data %>%
+          filter(!is.na(value)) %>%
+          mutate(
+            normalized_value = case_when(
+              is.finite(value) && value >= 0 ~
+                (value - min(value, na.rm = TRUE)) /
+                  (max(value, na.rm = TRUE) - min(value, na.rm = TRUE)),
+              is.finite(value) && value < 0 ~
+                pmax(-1, pmin(0, value / abs(min(value, na.rm = TRUE)))),
+              TRUE ~ NA_real_
+            ),
+            quality_flag = ifelse(
+              abs(scale(value)[, 1]) > 3,
+              "outlier",
+              ifelse(
+                is.na(value) || !is.finite(value),
+                "invalid",
+                "valid"
+              )
+            )
+          ) %>%
+          arrange(timestamp, id)
+
+        data_list[[i]] <- processed
+      }
+
+      # Final aggregation with complex grouping
+      final_result <- bind_rows(data_list, .id = "batch_id") %>%
+        group_by(batch_id) %>%
+        summarise(
+          total_records = n(),
+          valid_records = sum(quality_flag == "valid", na.rm = TRUE),
+          mean_value = mean(normalized_value, na.rm = TRUE),
+          outlier_rate = mean(quality_flag == "outlier", na.rm = TRUE),
+          completeness = mean(!is.na(normalized_value)),
+          .groups = "drop"
+        ) %>%
+        mutate(
+          quality_score = (
+            0.4 * completeness +
+              0.3 * (1 - outlier_rate) +
+              0.3 * pmin(1, valid_records / total_records)
+          ),
+          grade = case_when(
+            quality_score >= 0.9 ~ "A",
+            quality_score >= 0.8 ~ "B",
+            quality_score >= 0.7 ~ "C",
+            quality_score >= 0.6 ~ "D",
+            TRUE ~ "F"
+          )
+        )
+
+      return(final_result)
+    },
+    error = function(e) {
+      if (config$debug_mode %||% FALSE) {
+        cat(
+          "Debug info - Error occurred at:",
+          format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
+          "\n"
+        )
+        print(str(data_list))
+      }
+
+      stop(paste("Error in process_data_batch:", e$message))
+    },
+    warning = function(w) {
+      if (config$verbose %||% TRUE) {
+        message("Warning in process_data_batch: ", w$message)
+      }
+      invokeRestart("muffleWarning")
+    },
+    finally = {
+      if (config$cleanup %||% TRUE) {
+        gc() # Force garbage collection
+      }
+    }
+  )
+}
diff --git a/crates/roughly/tests/snapshots/test_format__complex_data_manipulation__nested_data_pipeline.snap b/crates/roughly/tests/snapshots/test_format__complex_data_manipulation__nested_data_pipeline.snap
@@ -0,0 +1,42 @@
+---
+source: crates/roughly/tests/test_format.rs
+expression: code
+---
+# Complex data manipulation with nested function calls and pipeline operators
+library(dplyr)
+library(tidyr)
+
+result <- raw_data %>%
+  filter(
+    !is.na(value) &
+      year >= 2020 &
+      category %in% c("A", "B", "C")
+  ) %>%
+  group_by(
+    region,
+    category,
+    quarter = paste0("Q", ceiling(month(date) / 3))
+  ) %>%
+  summarise(
+    mean_value = mean(value, na.rm = TRUE),
+    median_value = median(value, na.rm = TRUE),
+    count = n(),
+    .groups = "drop"
+  ) %>%
+  pivot_wider(
+    names_from = quarter,
+    values_from = c(mean_value, median_value, count),
+    names_sep = "_"
+  ) %>%
+  mutate(
+    total_mean = rowMeans(
+      select(., starts_with("mean_value")),
+      na.rm = TRUE
+    ),
+    variance_ratio = case_when(
+      total_mean > 100 ~ "high",
+      total_mean > 50 ~ "medium",
+      TRUE ~ "low"
+    )
+  ) %>%
+  arrange(desc(total_mean))
diff --git a/crates/roughly/tests/snapshots/test_format__statistical_modeling__complex_regression_analysis.snap b/crates/roughly/tests/snapshots/test_format__statistical_modeling__complex_regression_analysis.snap