Skip to content

Commit 527d6a7

Browse files
committed
specify import options and remove unnecessary regex
1 parent d04bcbb commit 527d6a7

File tree

1 file changed

+3
-11
lines changed

1 file changed

+3
-11
lines changed

mimic-iv-ed/buildmimic/duckdb/import_duckdb.sh

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,9 @@ fi
7272
# 1. Remove optional precision value from TIMESTAMP(NN) -> TIMESTAMP
7373
# duckdb does not support this.
7474
export REGEX_TIMESTAMP='s/TIMESTAMP\([0-9]+\)/TIMESTAMP/g'
75-
# 2. Remove NOT NULL constraint from mimiciv_hosp.microbiologyevents.spec_type_desc
76-
# as there is one (!) zero-length string which is treated as a NULL by the import.
77-
export REGEX_SPEC_TYPE='s/spec_type_desc(.+)NOT NULL/spec_type_desc\1/g'
78-
# 3. Remove NOT NULL constraint from mimiciv_hosp.prescriptions.drug
79-
# as there are zero-length strings which are treated as NULLs by the import.
80-
export REGEX_DRUG='s/drug +(VARCHAR.+)NOT NULL/drug \1/g'
8175

8276
# use sed + above regex to create tables within db
83-
sed -r -e "${REGEX_TIMESTAMP}" ../postgres/create.sql | \
84-
sed -r -e "${REGEX_SPEC_TYPE}" | \
85-
sed -r -e "${REGEX_DRUG}" | \
77+
sed -r -e "${REGEX_TIMESTAMP}" ../postgres/create.sql |
8678
duckdb "$OUTFILE"
8779

8880
# goal: get path from find, e.g., ./1.0/icu/d_items
@@ -110,9 +102,9 @@ find "$MIMIC_DIR" -type f -name '*.csv???' | sort | while IFS= read -r FILE; do
110102
(ed) ;; # OK
111103
(*) continue;
112104
esac
113-
echo "Loading $FILE .. \c"
105+
echo "Loading $FILE .. "
114106
try duckdb "$OUTFILE" <<-EOSQL
115-
COPY $TABLE_NAME FROM '$FILE' (HEADER);
107+
COPY $TABLE_NAME FROM '$FILE' (HEADER, DELIM ',', QUOTE '"', ESCAPE '"');
116108
EOSQL
117109
echo "done!"
118110
done && echo "Successfully finished loading data into $OUTFILE."

0 commit comments

Comments
 (0)