|
72 | 72 | # 1. Remove optional precision value from TIMESTAMP(NN) -> TIMESTAMP |
73 | 73 | # duckdb does not support this. |
74 | 74 | export REGEX_TIMESTAMP='s/TIMESTAMP\([0-9]+\)/TIMESTAMP/g' |
75 | | -# 2. Remove NOT NULL constraint from mimiciv_hosp.microbiologyevents.spec_type_desc |
76 | | -# as there is one (!) zero-length string which is treated as a NULL by the import. |
77 | | -export REGEX_SPEC_TYPE='s/spec_type_desc(.+)NOT NULL/spec_type_desc\1/g' |
78 | | -# 3. Remove NOT NULL constraint from mimiciv_hosp.prescriptions.drug |
79 | | -# as there are zero-length strings which are treated as NULLs by the import. |
80 | | -export REGEX_DRUG='s/drug +(VARCHAR.+)NOT NULL/drug \1/g' |
81 | 75 |
|
82 | 76 | # use sed + above regex to create tables within db |
83 | | -sed -r -e "${REGEX_TIMESTAMP}" ../postgres/create.sql | \ |
84 | | - sed -r -e "${REGEX_SPEC_TYPE}" | \ |
85 | | - sed -r -e "${REGEX_DRUG}" | \ |
| 77 | +sed -r -e "${REGEX_TIMESTAMP}" ../postgres/create.sql | |
86 | 78 | duckdb "$OUTFILE" |
87 | 79 |
|
88 | 80 | # goal: get path from find, e.g., ./1.0/icu/d_items |
@@ -110,9 +102,9 @@ find "$MIMIC_DIR" -type f -name '*.csv???' | sort | while IFS= read -r FILE; do |
110 | 102 | (ed) ;; # OK |
111 | 103 | (*) continue; |
112 | 104 | esac |
113 | | - echo "Loading $FILE .. \c" |
| 105 | + echo "Loading $FILE .. " |
114 | 106 | try duckdb "$OUTFILE" <<-EOSQL |
115 | | - COPY $TABLE_NAME FROM '$FILE' (HEADER); |
| 107 | + COPY $TABLE_NAME FROM '$FILE' (HEADER, DELIM ',', QUOTE '"', ESCAPE '"'); |
116 | 108 | EOSQL |
117 | 109 | echo "done!" |
118 | 110 | done && echo "Successfully finished loading data into $OUTFILE." |
0 commit comments