File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -71,7 +71,8 @@ $PYTHON -m src.data.build_triage_model_table
7171
7272echo " [9/19] build_pneumonia_labels_from_chexpert"
7373$PYTHON -m src.data.build_pneumonia_labels_from_chexpert \
74- --metadata-root " $CXR_ROOT "
74+ --metadata-root " $CXR_ROOT " \
75+ --allow-fallback-study-merge
7576
7677echo " [10/19] build_pneumonia_training_table (u_ignore)"
7778$PYTHON -m src.data.build_pneumonia_training_table \
@@ -120,7 +121,8 @@ $PYTHON -m src.data.build_image_pretraining_split
120121
121122echo " [18/19] build_image_multilabel_pretrain_table"
122123$PYTHON -m src.data.build_image_multilabel_pretrain_table \
123- --metadata-root " $CXR_ROOT "
124+ --metadata-root " $CXR_ROOT " \
125+ --allow-fallback-study-merge
124126
125127echo " [19/19] build_nonED_image_eval_table"
126128$PYTHON -m src.data.build_nonED_image_eval_table \
Original file line number Diff line number Diff line change @@ -28,8 +28,11 @@ def main() -> None:
2828
2929 df = pd .read_parquet (args .input_manifest ).copy ()
3030
31- # Keep only rows with valid paths
32- if "exists" in df .columns :
31+ # Keep only rows with valid paths.
32+ # exists=True → verified present, keep.
33+ # exists=False → verified missing, drop.
34+ # exists=NA → not checked, assume present.
35+ if "exists" in df .columns and not df ["exists" ].isna ().all ():
3336 df = df [df ["exists" ] == True ].copy ()
3437
3538 # Keep only frontal rows
You can’t perform that action at this time.
0 commit comments