Skip to content

Commit 3c6eecb

Browse files
committed
fix evaluation path mismatches and prune unreachable artefact checks
1 parent 1e5f02b commit 3c6eecb

2 files changed

Lines changed: 19 additions & 57 deletions

File tree

Makefile

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,32 @@ bootstrap_delta:
3636
$(PYTHON) -m src.evaluation.bootstrap_eval \
3737
--model-a artifacts/models/multimodal_pneumonia_densenet121_triage_u_ignore_temporal_stronger_lr_v3/test_predictions.csv \
3838
--model-b artifacts/models/image_pneumonia_finetune_densenet121_u_ignore_temporal_stronger_lr_v3/test_predictions.csv \
39-
--output-json artifacts/evaluation/bootstrap_multimodal_vs_image.json \
39+
--output-json artifacts/evaluation/bootstrap_multimodal_vs_image_stronger_lr_v3.json \
4040
--n-bootstrap 2000 --seed $(SEED)
4141

4242
calibration:
4343
$(PYTHON) -m src.evaluation.calibration_analysis \
44-
--output-dir artifacts/evaluation/calibration_final \
44+
--output-dir artifacts/evaluation/calibration_stronger_lr_v3 \
4545
--n-bins 10 --bootstrap --n-bootstrap 2000 \
4646
--model "Image" artifacts/models/image_pneumonia_finetune_densenet121_u_ignore_temporal_stronger_lr_v3/test_predictions.csv \
4747
--model "Multimodal" artifacts/models/multimodal_pneumonia_densenet121_triage_u_ignore_temporal_stronger_lr_v3/test_predictions.csv
4848

49+
dca:
50+
$(PYTHON) -m src.evaluation.decision_curve_analysis \
51+
--output-dir artifacts/evaluation/dca \
52+
--model "Image" artifacts/models/image_pneumonia_finetune_densenet121_u_ignore_temporal_stronger_lr_v3/test_predictions.csv \
53+
--model "Multimodal" artifacts/models/multimodal_pneumonia_densenet121_triage_u_ignore_temporal_stronger_lr_v3/test_predictions.csv
54+
4955
feature_ablation:
5056
$(PYTHON) scripts/collect_feature_ablation_results.py
5157

52-
evaluate: bootstrap_delta calibration feature_ablation
58+
evaluate: bootstrap_delta calibration dca feature_ablation
5359

5460
# ─── SHAP ────────────────────────────────────────────────────────────────────
5561
shap:
56-
$(PYTHON) scripts/generate_shap_clinical.py
62+
$(PYTHON) scripts/generate_shap_clinical.py \
63+
--model-dir artifacts/models/clinical_xgb_u_ignore_temporal_strong_v2 \
64+
--feature-groups all
5765

5866
# ─── Publication report ───────────────────────────────────────────────────────
5967
report:
@@ -68,4 +76,4 @@ all: pretrain finetune_image finetune_multimodal train_clinical evaluate shap re
6876

6977
.PHONY: preprocess preprocess_labs pretrain finetune_image finetune_multimodal \
7078
train_clinical_lr train_clinical_xgb train_clinical bootstrap_delta \
71-
calibration feature_ablation evaluate shap report all test
79+
calibration dca feature_ablation evaluate shap report all test

scripts/regenerate_all_results.py

Lines changed: 6 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@
2525
# Source of truth — all numerical results
2626
("artifacts/evaluation/final_publication_report.json", MIN_JSON_BYTES, "json"),
2727

28+
# Bootstrap comparison
29+
(
30+
"artifacts/evaluation/bootstrap_multimodal_vs_image_stronger_lr_v3.json",
31+
MIN_JSON_BYTES, "json"
32+
),
33+
2834
# Feature ablation
2935
("artifacts/evaluation/feature_ablation_results.csv", MIN_CSV_BYTES, "csv"),
3036

@@ -46,58 +52,6 @@
4652

4753
# DCA
4854
("artifacts/evaluation/dca/decision_curve_standardized.png", MIN_PNG_BYTES, "png"),
49-
50-
# Generated thesis figures (from scripts/generate_thesis_figures.py)
51-
(
52-
"thesis_new_docs/figures/generated_results/fig_a1_pr_curves.png",
53-
MIN_PNG_BYTES, "png"
54-
),
55-
(
56-
"thesis_new_docs/figures/generated_results/fig_a3_ablation_bars.png",
57-
MIN_PNG_BYTES, "png"
58-
),
59-
(
60-
"thesis_new_docs/figures/generated_results/fig_a4_label_sensitivity.png",
61-
MIN_PNG_BYTES, "png"
62-
),
63-
(
64-
"thesis_new_docs/figures/generated_results/fig_a5_training_curves.png",
65-
MIN_PNG_BYTES, "png"
66-
),
67-
(
68-
"thesis_new_docs/figures/generated_results/fig_a6_missing_heatmap.png",
69-
MIN_PNG_BYTES, "png"
70-
),
71-
(
72-
"thesis_new_docs/figures/generated_results/fig_a7_vital_distributions.png",
73-
MIN_PNG_BYTES, "png"
74-
),
75-
(
76-
"thesis_new_docs/figures/generated_results/fig_a8_label_distribution.png",
77-
MIN_PNG_BYTES, "png"
78-
),
79-
80-
# Original result figures
81-
(
82-
"thesis_new_docs/figures/original_results/"
83-
"roc_curve_all_models.png",
84-
MIN_PNG_BYTES, "png"
85-
),
86-
(
87-
"thesis_new_docs/figures/original_results/"
88-
"calibration_stronger_lr_v3_reliability_diagram_all_models.png",
89-
MIN_PNG_BYTES, "png"
90-
),
91-
(
92-
"thesis_new_docs/figures/original_results/"
93-
"shap_summary_beeswarm.png",
94-
MIN_PNG_BYTES, "png"
95-
),
96-
(
97-
"thesis_new_docs/figures/original_results/"
98-
"decision_curve_standardized.png",
99-
MIN_PNG_BYTES, "png"
100-
),
10155
]
10256

10357

0 commit comments

Comments
 (0)