Skip to content

Commit c932c45

Browse files
committed
Add output manifest for analysis artefacts
1 parent e57cbc6 commit c932c45

6 files changed

Lines changed: 101 additions & 12 deletions

File tree

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Reproducible bulk RNA-seq differential expression pipeline using DESeq2: QC, shr
1717
- **Extended: Sex-stratified interaction analysis** — Condition-by-sex interaction model (`~ condition * gender`) to identify genes with sex-differential transcriptional responses, complementing the original study's sex-adjusted analysis with a formal interaction test
1818
- Extracts full GEO covariates (viral load Ct, age, sex, sequencing batch) for covariate-aware analyses
1919
- Raw and shrunken DE outputs, analysis summary metrics, and git/session provenance are generated automatically
20+
- `results/tables/output_manifest.csv` records file sizes and checksums for committed figures and tables
2021

2122
## Workflow
2223

@@ -42,7 +43,10 @@ GSE152075 (n=484, GEO)
4243
└──→ 09 Sex interaction ── ~ condition * gender → 12 sex-biased genes (9 male, 3 female)
4344
4445
45-
07 Provenance ──── Git commit, session info, config, package versions → REPRODUCIBILITY.md
46+
12 Manifest ────── File-size and checksum manifest for committed figures/tables
47+
48+
49+
07 Provenance ──── Git commit, session info, config, package versions → results/session_info.txt
4650
```
4751

4852
## Methods Overview
@@ -251,6 +255,7 @@ bulk-rnaseq-differential-expression/
251255
│ ├── 09_sensitivity_analysis.R
252256
│ ├── 10_viral_load_stratification.R # Extended: high vs low viral load DE
253257
│ ├── 11_sex_stratified_analysis.R # Extended: condition x gender interaction
258+
│ ├── 12_output_manifest.R # Checksums for committed figures/tables
254259
│ └── config.R # Shared analysis thresholds and helpers
255260
├── data/
256261
│ └── [RDS files]
@@ -277,6 +282,7 @@ source("scripts/08_pathway_diagram.R")
277282
source("scripts/09_sensitivity_analysis.R")
278283
source("scripts/10_viral_load_stratification.R")
279284
source("scripts/11_sex_stratified_analysis.R")
285+
source("scripts/12_output_manifest.R")
280286
```
281287

282288
## Methods

REPRODUCIBILITY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ This repository is set up so a reviewer can reproduce the analysis with a small
77
- **Version pinning**: `renv.lock` pins CRAN + Bioconductor package versions.
88
- **Pre-computed outputs**: key figures and tables are committed under `results/` for convenience and quick verification.
99
- **Analysis summary**: `results/tables/analysis_summary.csv` captures the main counts used in the narrative.
10+
- **Output manifest**: `results/tables/output_manifest.csv` records file sizes and MD5 checksums for committed figures and tables.
1011
- **Pinned pathway snapshot**: `data/reference/kegg_hsa_pathway_*.tsv` freezes the KEGG human pathway universe used by enrichment.
1112

1213
## From a clean checkout (recommended)
@@ -47,6 +48,7 @@ After a successful run, you should see (among others):
4748
- `results/tables/deseq2_results_shrunken.csv`
4849
- `results/tables/full_cohort_deseq2_results.csv`
4950
- `results/tables/analysis_summary.csv`
51+
- `results/tables/output_manifest.csv`
5052
- `results/figures/volcano_plot.png`
5153
- `results/figures/sensitivity_lfc_scatter.png`
5254
- `results/figures/pca_plot.png`

results/tables/output_manifest.csv

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"path","bytes","md5"
2+
"results/figures/dispersion_plot.png",505694,"24ad93bc7ee8d56bf4ced6b98d4e1a57"
3+
"results/figures/go_dotplot.png",500804,"67fff951fbd53390334375ee84df802f"
4+
"results/figures/kegg_dotplot.png",443704,"fa05cd5ad9f3d1046aeebd83c6f73326"
5+
"results/figures/ma_plot.png",678515,"7110b781f954801398920463b3974f78"
6+
"results/figures/pathway_diagram.png",342418,"9e6345e0a436b411d72ebda295d7fd76"
7+
"results/figures/pca_plot.png",257242,"bb9aeb68d36a090f31825c308827910b"
8+
"results/figures/pca_scree.png",172959,"580e94c17877221ce49e121964ae7513"
9+
"results/figures/pca_sex_stratified.png",205728,"3c10f6413a02c9e684551fdbfa241d12"
10+
"results/figures/qc_library_size.png",302762,"e63c4f1a6a77c9f9e15a752737edf155"
11+
"results/figures/sample_distances.png",342140,"5658db50d4aa5bb498d5032af8258d13"
12+
"results/figures/sensitivity_lfc_scatter.png",654130,"cc8ab93c82653b6ffb68abca6f2b5f83"
13+
"results/figures/sex_interaction_volcano.png",619575,"ccf483f2c66562f1a5b1acc3df76f527"
14+
"results/figures/top50_heatmap.png",475753,"a6c0563044c6a7a261b1eb6893729883"
15+
"results/figures/viral_load_ct_distribution.png",134209,"2767a9d8a588e087e05c0710bf691836"
16+
"results/figures/viral_load_isg_correlation.png",524869,"ccaa04a2f4c716c6196587c3b81402a1"
17+
"results/figures/viral_load_volcano.png",543042,"18c4dc222a8fe5c518c58d755c3050ef"
18+
"results/figures/volcano_plot.png",579141,"b306cae707412da5cbeeee5285593256"
19+
"results/tables/analysis_summary.csv",451,"c36ab48fef1b91b3ffc2660501bed10a"
20+
"results/tables/deseq2_results_shrunken.csv",1634699,"f7997b9e23b8d8f333a62dcd40aa81d4"
21+
"results/tables/deseq2_results.csv",1629915,"55b5443669b9273ea803bba9c95d61b1"
22+
"results/tables/full_cohort_deseq2_results.csv",2762171,"86587ccc81941a606ee0e908eb2d2501"
23+
"results/tables/go_biological_process.csv",142506,"ec7ea045881bb700a6376aa84998033f"
24+
"results/tables/kegg_pathways.csv",9894,"65c571c2c112082722296692f9a5afcb"
25+
"results/tables/sex_analysis_summary.csv",167,"d1201d896d5e2d0780114ccfd20d1c98"
26+
"results/tables/sex_interaction_de_results.csv",2085625,"30c1138fa3ae35887084b605c2582cb4"
27+
"results/tables/top_genes.csv",670,"b5c3dfd5f7dcf078ac512af37fa36b68"
28+
"results/tables/viral_load_de_results.csv",2276277,"46d6160b57565f2a7244d0228517b17a"
29+
"results/tables/viral_load_summary.csv",162,"5affeb45ca4a2421202f05b31d2e21f9"

run_all.R

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,18 @@ run_script <- function(script_path, step) {
2727
})
2828
}
2929

30-
run_script("scripts/00_get_data.R", " 1/11")
31-
run_script("scripts/01_qc.R", " 2/11")
32-
run_script("scripts/02_pca.R", " 3/11")
33-
run_script("scripts/03_deseq2.R", " 4/11")
34-
run_script("scripts/04_visualisation_volcano.R", " 5/11")
35-
run_script("scripts/05_model_diagnostics.R", " 6/11")
36-
run_script("scripts/06_enrichment.R", " 7/11")
37-
run_script("scripts/09_sensitivity_analysis.R", " 8/11")
38-
run_script("scripts/08_pathway_diagram.R", " 9/11")
39-
run_script("scripts/10_viral_load_stratification.R", "10/11")
40-
run_script("scripts/11_sex_stratified_analysis.R", "11/11")
30+
run_script("scripts/00_get_data.R", " 1/12")
31+
run_script("scripts/01_qc.R", " 2/12")
32+
run_script("scripts/02_pca.R", " 3/12")
33+
run_script("scripts/03_deseq2.R", " 4/12")
34+
run_script("scripts/04_visualisation_volcano.R", " 5/12")
35+
run_script("scripts/05_model_diagnostics.R", " 6/12")
36+
run_script("scripts/06_enrichment.R", " 7/12")
37+
run_script("scripts/09_sensitivity_analysis.R", " 8/12")
38+
run_script("scripts/08_pathway_diagram.R", " 9/12")
39+
run_script("scripts/10_viral_load_stratification.R", "10/12")
40+
run_script("scripts/11_sex_stratified_analysis.R", "11/12")
41+
run_script("scripts/12_output_manifest.R", "12/12")
4142

4243
run_script("scripts/07_reproducibility.R", "Session")
4344

scripts/12_output_manifest.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env Rscript
2+
# Write a checksum manifest for committed result artefacts.
3+
4+
dir.create("results/tables", recursive = TRUE, showWarnings = FALSE)
5+
6+
figure_files <- list.files(
7+
"results/figures",
8+
pattern = "\\.(png|pdf)$",
9+
full.names = TRUE
10+
)
11+
table_files <- list.files(
12+
"results/tables",
13+
pattern = "\\.csv$",
14+
full.names = TRUE
15+
)
16+
table_files <- table_files[basename(table_files) != "output_manifest.csv"]
17+
18+
artefacts <- sort(c(figure_files, table_files))
19+
if (length(artefacts) == 0) {
20+
stop("No result artefacts found under results/figures or results/tables")
21+
}
22+
23+
info <- file.info(artefacts)
24+
manifest <- data.frame(
25+
path = gsub("\\\\", "/", artefacts),
26+
bytes = as.numeric(info$size),
27+
md5 = unname(tools::md5sum(artefacts)),
28+
stringsAsFactors = FALSE
29+
)
30+
31+
manifest <- manifest[order(manifest$path), ]
32+
write.csv(manifest, "results/tables/output_manifest.csv", row.names = FALSE)
33+
message("Wrote output manifest for ", nrow(manifest), " artefacts")

tests/testthat/test-smoke.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,24 @@ test_that("key derived tables exist", {
9696
expect_gt(summary_df$shrunken_lfc_spearman[[1]], 0.8)
9797
})
9898

99+
test_that("output manifest tracks committed result artefacts", {
100+
manifest_path <- file.path(root, "results/tables/output_manifest.csv")
101+
expect_true(file.exists(manifest_path))
102+
103+
manifest <- read.csv(manifest_path, stringsAsFactors = FALSE)
104+
expect_true(all(c("path", "bytes", "md5") %in% names(manifest)))
105+
expect_gt(nrow(manifest), 20)
106+
expect_true(all(file.exists(file.path(root, manifest$path))))
107+
expect_true(all(manifest$bytes > 0))
108+
expect_true(all(grepl("^[0-9a-f]{32}$", manifest$md5)))
109+
110+
actual_sizes <- file.info(file.path(root, manifest$path))$size
111+
expect_equal(as.numeric(manifest$bytes), as.numeric(actual_sizes))
112+
expect_true("results/tables/analysis_summary.csv" %in% manifest$path)
113+
expect_true("results/figures/volcano_plot.png" %in% manifest$path)
114+
expect_true("results/figures/sensitivity_lfc_scatter.png" %in% manifest$path)
115+
})
116+
99117
test_that("KEGG enrichment uses pinned human pathway references", {
100118
links_path <- file.path(root, "data/reference/kegg_hsa_pathway_links.tsv")
101119
names_path <- file.path(root, "data/reference/kegg_hsa_pathway_names.tsv")

0 commit comments

Comments
 (0)