Skip to content

Commit 713ff66

Browse files
committed
add graceful continue if non-database csv files are in the folder
1 parent 63dc2bb commit 713ff66

File tree

2 files changed

+34
-13
lines changed

2 files changed

+34
-13
lines changed

mimic-iv-note/buildmimic/duckdb/import_duckdb.sh

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,17 @@ elif [ -s "$OUTFILE" ]; then
6767
esac
6868
fi
6969

70+
# trim trailing slash from MIMIC_DIR, if present
71+
MIMIC_DIR=${MIMIC_DIR%/}
72+
7073
# we will copy the postgresql create.sql file, and apply regex
7174
# to fix the following issues:
7275
# 1. Remove optional precision value from TIMESTAMP(NN) -> TIMESTAMP
7376
# duckdb does not support this.
7477
export REGEX_TIMESTAMP='s/TIMESTAMP\([0-9]+\)/TIMESTAMP/g'
75-
# 2. Remove NOT NULL constraint from mimiciv_hosp.microbiologyevents.spec_type_desc
76-
# as there is one (!) zero-length string which is treated as a NULL by the import.
77-
export REGEX_SPEC_TYPE='s/spec_type_desc(.+)NOT NULL/spec_type_desc\1/g'
78-
# 3. Remove NOT NULL constraint from mimiciv_hosp.prescriptions.drug
79-
# as there are zero-length strings which are treated as NULLs by the import.
80-
export REGEX_DRUG='s/drug +(VARCHAR.+)NOT NULL/drug \1/g'
8178

8279
# use sed + above regex to create tables within db
8380
sed -r -e "${REGEX_TIMESTAMP}" ../postgres/create.sql | \
84-
sed -r -e "${REGEX_SPEC_TYPE}" | \
85-
sed -r -e "${REGEX_DRUG}" | \
8681
duckdb "$OUTFILE"
8782

8883
# goal: get path from find, e.g., ./1.0/icu/d_items
@@ -110,9 +105,22 @@ find "$MIMIC_DIR" -type f -name '*.csv???' | sort | while IFS= read -r FILE; do
110105
(note) ;; # OK
111106
(*) continue;
112107
esac
113-
echo "Loading $FILE .. \c"
114-
try duckdb "$OUTFILE" <<-EOSQL
115-
COPY $TABLE_NAME FROM '$FILE' (HEADER);
108+
echo "Loading $FILE .."
109+
OUTPUT=$(duckdb "$OUTFILE" 2>&1 <<-EOSQL
110+
COPY $TABLE_NAME FROM '$FILE' (HEADER, DELIM ',', QUOTE '"', ESCAPE '"');
116111
EOSQL
112+
)
113+
# If the table is missing in the DB, we emit a warning and continue.
114+
# Otherwise, the script repeats the error and exits.
115+
STATUS=$?
116+
if [ $STATUS -ne 0 ]; then
117+
echo "$OUTPUT" | grep -qiE 'table .* does not exist' && {
118+
echo "skipped (table $TABLE_NAME not found)";
119+
continue;
120+
}
121+
yell "Failed loading $FILE into $TABLE_NAME"
122+
yell "$OUTPUT"
123+
die "Exiting due to load error."
124+
fi
117125
echo "done!"
118126
done && echo "Successfully finished loading data into $OUTFILE."

mimic-iv/buildmimic/duckdb/import_duckdb.sh

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,21 @@ find "$MIMIC_DIR" -type f -name '*.csv???' | sort | while IFS= read -r FILE; do
111111
(*) continue;
112112
esac
113113
echo "Loading $FILE .. \c"
114-
try duckdb "$OUTFILE" <<-EOSQL
115-
COPY $TABLE_NAME FROM '$FILE' (HEADER);
114+
OUTPUT=$(duckdb "$OUTFILE" 2>&1 <<-EOSQL
115+
COPY $TABLE_NAME FROM '$FILE' (HEADER, DELIM ',', QUOTE '"', ESCAPE '"');
116116
EOSQL
117+
)
118+
# If the table is missing in the DB, we emit a warning and continue.
119+
# Otherwise, the script repeats the error and exits.
120+
STATUS=$?
121+
if [ $STATUS -ne 0 ]; then
122+
echo "$OUTPUT" | grep -qiE 'table .* does not exist' && {
123+
echo "skipped (table $TABLE_NAME not found)";
124+
continue;
125+
}
126+
yell "Failed loading $FILE into $TABLE_NAME"
127+
yell "$OUTPUT"
128+
die "Exiting due to load error."
129+
fi
117130
echo "done!"
118131
done && echo "Successfully finished loading data into $OUTFILE."

0 commit comments

Comments
 (0)