Skip to content

Commit a696a41

Browse files
committed
Fix CI rebuild validation for cross-platform reproducibility
1 parent ac8edfe commit a696a41

2 files changed

Lines changed: 152 additions & 10 deletions

File tree

.github/workflows/ci.yml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ jobs:
6767
- name: Install pinned dependencies
6868
run: Rscript 000_install_dependencies.R
6969

70+
- name: Save tracked tables for comparison
71+
run: |
72+
mkdir -p "$RUNNER_TEMP/reference_tables"
73+
cp results/tables/*.csv "$RUNNER_TEMP/reference_tables"/
74+
7075
- name: Remove committed analysis outputs before rebuild
7176
run: rm -f results/figures/*.png results/tables/*.csv
7277

@@ -76,14 +81,13 @@ jobs:
7681
- name: Validate rebuilt outputs
7782
run: Rscript -e 'testthat::test_dir("tests/testthat")'
7883

79-
- name: Verify regenerated tables match tracked results
80-
run: git diff --exit-code -- results/tables
84+
- name: Compare regenerated tables to tracked results
85+
run: Rscript dev/compare_tables.R "$RUNNER_TEMP/reference_tables" results/tables
8186

82-
- name: Verify key figures match tracked results
83-
run: >
84-
git diff --exit-code --
85-
results/figures/qc_library_size.png
86-
results/figures/ma_plot.png
87-
results/figures/pca_plot.png
88-
results/figures/sensitivity_lfc_scatter.png
89-
results/figures/volcano_plot.png
87+
- name: Verify key figures were regenerated
88+
run: |
89+
test -f results/figures/qc_library_size.png
90+
test -f results/figures/ma_plot.png
91+
test -f results/figures/pca_plot.png
92+
test -f results/figures/sensitivity_lfc_scatter.png
93+
test -f results/figures/volcano_plot.png

dev/compare_tables.R

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/usr/bin/env Rscript
2+
3+
args <- commandArgs(trailingOnly = TRUE)
4+
if (length(args) != 2) {
5+
stop("Usage: Rscript dev/compare_tables.R <reference_dir> <generated_dir>")
6+
}
7+
8+
reference_dir <- args[[1]]
9+
generated_dir <- args[[2]]
10+
comparison_digits <- 5L
11+
12+
if (!dir.exists(reference_dir)) {
13+
stop("Reference directory not found: ", reference_dir)
14+
}
15+
if (!dir.exists(generated_dir)) {
16+
stop("Generated directory not found: ", generated_dir)
17+
}
18+
19+
list_csv_files <- function(path) {
20+
sort(list.files(path, pattern = "\\.csv$", full.names = FALSE))
21+
}
22+
23+
reference_files <- list_csv_files(reference_dir)
24+
generated_files <- list_csv_files(generated_dir)
25+
26+
if (!identical(reference_files, generated_files)) {
27+
missing_files <- setdiff(reference_files, generated_files)
28+
extra_files <- setdiff(generated_files, reference_files)
29+
details <- c(
30+
if (length(missing_files) > 0) paste("missing:", paste(missing_files, collapse = ", ")),
31+
if (length(extra_files) > 0) paste("extra:", paste(extra_files, collapse = ", "))
32+
)
33+
stop("CSV file set mismatch between reference and generated outputs. ", paste(details, collapse = "; "))
34+
}
35+
36+
first_mismatch <- function(reference_values, generated_values) {
37+
same <- (reference_values == generated_values) | (is.na(reference_values) & is.na(generated_values))
38+
same[is.na(same)] <- FALSE
39+
mismatch <- which(!same)
40+
if (length(mismatch) == 0) {
41+
return(NA_integer_)
42+
}
43+
mismatch[[1]]
44+
}
45+
46+
format_value <- function(value) {
47+
if (is.na(value)) {
48+
return("NA")
49+
}
50+
as.character(value)
51+
}
52+
53+
compare_numeric_column <- function(reference_col, generated_col, file_label, col_name) {
54+
reference_cmp <- signif(reference_col, digits = comparison_digits)
55+
generated_cmp <- signif(generated_col, digits = comparison_digits)
56+
mismatch <- first_mismatch(reference_cmp, generated_cmp)
57+
if (!is.na(mismatch)) {
58+
stop(
59+
sprintf(
60+
"%s: numeric mismatch in column '%s' at row %d (reference=%s, generated=%s; compared at %d significant digits)",
61+
file_label,
62+
col_name,
63+
mismatch,
64+
format_value(reference_col[[mismatch]]),
65+
format_value(generated_col[[mismatch]]),
66+
comparison_digits
67+
)
68+
)
69+
}
70+
}
71+
72+
compare_text_column <- function(reference_col, generated_col, file_label, col_name) {
73+
reference_chr <- ifelse(is.na(reference_col), NA_character_, as.character(reference_col))
74+
generated_chr <- ifelse(is.na(generated_col), NA_character_, as.character(generated_col))
75+
mismatch <- first_mismatch(reference_chr, generated_chr)
76+
if (!is.na(mismatch)) {
77+
stop(
78+
sprintf(
79+
"%s: value mismatch in column '%s' at row %d (reference=%s, generated=%s)",
80+
file_label,
81+
col_name,
82+
mismatch,
83+
format_value(reference_chr[[mismatch]]),
84+
format_value(generated_chr[[mismatch]])
85+
)
86+
)
87+
}
88+
}
89+
90+
for (filename in reference_files) {
91+
file_label <- paste0("results/tables/", filename)
92+
reference_df <- read.csv(
93+
file.path(reference_dir, filename),
94+
stringsAsFactors = FALSE,
95+
check.names = FALSE
96+
)
97+
generated_df <- read.csv(
98+
file.path(generated_dir, filename),
99+
stringsAsFactors = FALSE,
100+
check.names = FALSE
101+
)
102+
103+
if (!identical(names(reference_df), names(generated_df))) {
104+
stop(file_label, ": column mismatch between reference and generated outputs")
105+
}
106+
if (nrow(reference_df) != nrow(generated_df) || ncol(reference_df) != ncol(generated_df)) {
107+
stop(
108+
file_label,
109+
": dimension mismatch (reference=",
110+
nrow(reference_df),
111+
"x",
112+
ncol(reference_df),
113+
", generated=",
114+
nrow(generated_df),
115+
"x",
116+
ncol(generated_df),
117+
")"
118+
)
119+
}
120+
121+
for (col_name in names(reference_df)) {
122+
reference_col <- reference_df[[col_name]]
123+
generated_col <- generated_df[[col_name]]
124+
if (is.numeric(reference_col) && is.numeric(generated_col)) {
125+
compare_numeric_column(reference_col, generated_col, file_label, col_name)
126+
} else {
127+
compare_text_column(reference_col, generated_col, file_label, col_name)
128+
}
129+
}
130+
}
131+
132+
message(
133+
"Tracked tables match regenerated outputs across ",
134+
length(reference_files),
135+
" files at ",
136+
comparison_digits,
137+
" significant digits."
138+
)

0 commit comments

Comments
 (0)