Skip to content

Commit 05f1498

Browse files
authored
Merge pull request #1209 from MIT-LCP/bg_minor_fix
Fixes to bg, postgres vent queries, and tests
2 parents 5a21e13 + f6f4ebe commit 05f1498

File tree

11 files changed

+114
-68
lines changed

11 files changed

+114
-68
lines changed

mimic-iv/concepts/firstday/first_day_bg_art.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ select
2626
FROM `physionet-data.mimic_icu.icustays` ie
2727
LEFT JOIN `physionet-data.mimic_derived.bg` bg
2828
ON ie.subject_id = bg.subject_id
29-
AND bg.specimen_pred = 'ART.'
29+
AND bg.specimen = 'ART.'
3030
AND bg.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
3131
AND bg.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
3232
GROUP BY ie.subject_id, ie.stay_id

mimic-iv/concepts/postgres/firstday/first_day_bg_art.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ select
2828
FROM mimic_icu.icustays ie
2929
LEFT JOIN mimic_derived.bg bg
3030
ON ie.subject_id = bg.subject_id
31-
AND bg.specimen_pred = 'ART.'
31+
AND bg.specimen = 'ART.'
3232
AND bg.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
3333
AND bg.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
3434
GROUP BY ie.subject_id, ie.stay_id

mimic-iv/concepts/postgres/measurement/bg.sql

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ select
1313
-- specimen_id *may* have different storetimes, so this is taking the latest
1414
, MAX(storetime) AS storetime
1515
, le.specimen_id
16-
, MAX(CASE WHEN itemid = 52028 THEN value ELSE NULL END) AS specimen
16+
, MAX(CASE WHEN itemid = 52033 THEN value ELSE NULL END) AS specimen
1717
, MAX(CASE WHEN itemid = 50801 THEN valuenum ELSE NULL END) AS aado2
1818
, MAX(CASE WHEN itemid = 50802 THEN valuenum ELSE NULL END) AS baseexcess
1919
, MAX(CASE WHEN itemid = 50803 THEN valuenum ELSE NULL END) AS bicarbonate
@@ -50,7 +50,7 @@ FROM mimic_hosp.labevents le
5050
where le.ITEMID in
5151
-- blood gases
5252
(
53-
52028 -- specimen
53+
52033 -- specimen
5454
, 50801 -- aado2
5555
, 50802 -- base excess
5656
, 50803 -- bicarb
@@ -129,22 +129,6 @@ where bg.po2 is not null
129129
select bg.*
130130
, ROW_NUMBER() OVER (partition by bg.subject_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2
131131
, s2.fio2_chartevents
132-
-- create our specimen prediction
133-
, 1/(1+exp(-(-0.02544
134-
+ 0.04598 * po2
135-
+ coalesce(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
136-
+ coalesce( 0.00621 * fio2_chartevents , 0.00621 * 51.49550 + -0.24958)
137-
+ coalesce( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
138-
+ coalesce( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172)
139-
+ coalesce(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630)
140-
+ coalesce( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142)
141-
+ coalesce(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328)
142-
+ coalesce(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535)
143-
+ coalesce( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405)
144-
+ coalesce( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
145-
+ coalesce( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617)
146-
+ coalesce( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803)
147-
))) as specimen_prob
148132
from stg2 bg
149133
left join stg_fio2 s2
150134
-- same patient
@@ -158,14 +142,8 @@ select
158142
stg3.subject_id
159143
, stg3.hadm_id
160144
, stg3.charttime
161-
-- raw data indicating sample type
145+
-- drop down text indicating the specimen type
162146
, specimen
163-
-- prediction of specimen for obs missing the actual specimen
164-
, case
165-
when specimen is not null then specimen
166-
when specimen_prob > 0.75 then 'ART.'
167-
else null end as specimen_pred
168-
, specimen_prob
169147

170148
-- oxygen related parameters
171149
, so2

mimic-iv/concepts/postgres/measurement/oxygen_delivery.sql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ with ce_stg1 as
88
, ce.charttime
99
, CASE
1010
-- merge o2 flows into a single row
11-
WHEN itemid IN (223834, 227582, 224691) THEN 223834
11+
WHEN itemid IN (223834, 227582) THEN 223834
1212
ELSE itemid END AS itemid
1313
, value
1414
, valuenum
@@ -20,9 +20,10 @@ with ce_stg1 as
2020
(
2121
223834 -- o2 flow
2222
, 227582 -- bipap o2 flow
23-
, 224691 -- Flow Rate (L)
2423
-- additional o2 flow is its own column
2524
, 227287 -- additional o2 flow
25+
-- below flow rate is *not* o2 flow, and not included
26+
-- , 224691 -- Flow Rate (L)
2627
)
2728
)
2829
, ce_stg2 AS

mimic-iv/concepts/postgres/measurement/ventilator_setting.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ with ce as
5050
, 223849 -- vent mode
5151
, 229314 -- vent mode (Hamilton)
5252
, 223848 -- vent type
53+
, 224691 -- Flow Rate (L)
5354
)
5455
)
5556
SELECT
@@ -66,6 +67,7 @@ SELECT
6667
, MAX(CASE WHEN itemid = 224696 THEN valuenum ELSE NULL END) AS plateau_pressure
6768
, MAX(CASE WHEN itemid in (220339, 224700) THEN valuenum ELSE NULL END) AS peep
6869
, MAX(CASE WHEN itemid = 223835 THEN valuenum ELSE NULL END) AS fio2
70+
, MAX(CASE WHEN itemid = 224691 THEN valuenum ELSE NULL END) AS flow_rate
6971
, MAX(CASE WHEN itemid = 223849 THEN value ELSE NULL END) AS ventilator_mode
7072
, MAX(CASE WHEN itemid = 229314 THEN value ELSE NULL END) AS ventilator_mode_hamilton
7173
, MAX(CASE WHEN itemid = 223848 THEN value ELSE NULL END) AS ventilator_type

mimic-iv/concepts/postgres/postgres-make-concepts.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
\i treatment/ventilation.sql
5757

5858
-- firstday
59-
\i firstday/first_day_bg_art.sql
6059
\i firstday/first_day_bg.sql
60+
\i firstday/first_day_bg_art.sql
6161
\i firstday/first_day_gcs.sql
6262
\i firstday/first_day_height.sql
6363
\i firstday/first_day_lab.sql

mimic-iv/concepts/postgres/treatment/ventilation.sql

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
11
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
22
DROP TABLE IF EXISTS ventilation; CREATE TABLE ventilation AS
3-
-- Calculate duration of mechanical ventilation.
3+
-- Classify oxygen devices and ventilator modes into six clinical categories.
4+
5+
-- Categories include..
6+
-- Invasive oxygen delivery types:
7+
-- Tracheostomy (with or without positive pressure ventilation)
8+
-- InvasiveVent (positive pressure ventilation via endotracheal tube, could be oro/nasotracheal or tracheostomy)
9+
-- Non invasive oxygen delivery types (divided similar to doi:10.1001/jama.2020.9524):
10+
-- NonInvasiveVent (non-invasive positive pressure ventilation)
11+
-- HFNC (high flow nasal oxygen / cannula)
12+
-- SupplementalOxygen (all other non-rebreather, facemask, face tent, nasal prongs...)
13+
-- No oxygen device:
14+
-- None
15+
16+
-- When conflicting settings occur (rare), the priority is:
17+
-- trach > mech vent > NIV > high flow > o2
18+
419
-- Some useful cases for debugging:
520
-- stay_id = 30019660 has a tracheostomy placed in the ICU
621
-- stay_id = 30000117 has explicit documentation of extubation
7-
-- classify vent settings into modes
22+
23+
-- first we collect all times which have relevant documentation
824
WITH tm AS
925
(
1026
SELECT stay_id, charttime
@@ -25,11 +41,13 @@ WITH tm AS
2541
-- tracheostomy
2642
WHEN o2_delivery_device_1 IN
2743
(
28-
'Tracheostomy tube'
29-
-- 'Trach mask ' -- 16435 observations
44+
'Tracheostomy tube',
45+
'Trach mask ' -- 16435 observations
46+
-- 'T-piece', -- 1135 observations (T-piece could be either InvasiveVent or Tracheostomy)
47+
3048
)
31-
THEN 'Trach'
32-
-- mechanical ventilation
49+
THEN 'Tracheostomy'
50+
-- mechanical / invasive ventilation
3351
WHEN o2_delivery_device_1 IN
3452
(
3553
'Endotracheal tube'
@@ -100,30 +118,33 @@ WITH tm AS
100118
'NIV-ST'
101119
)
102120
THEN 'NonInvasiveVent'
103-
-- high flow
121+
-- high flow nasal cannula
104122
when o2_delivery_device_1 IN
105123
(
106-
'High flow neb', -- 10785 observations
107124
'High flow nasal cannula' -- 925 observations
108125
)
109-
THEN 'HighFlow'
110-
-- normal oxygen delivery
126+
THEN 'HFNC'
127+
-- non rebreather
111128
WHEN o2_delivery_device_1 in
112-
(
113-
'Nasal cannula', -- 153714 observations
129+
(
130+
'Non-rebreather', -- 5182 observations
114131
'Face tent', -- 24601 observations
115132
'Aerosol-cool', -- 24560 observations
116-
'Non-rebreather', -- 5182 observations
117133
'Venti mask ', -- 1947 observations
118134
'Medium conc mask ', -- 1888 observations
119-
'T-piece', -- 1135 observations
120135
'Ultrasonic neb', -- 9 observations
121136
'Vapomist', -- 3 observations
122-
'Oxymizer' -- 1301 observations
137+
'Oxymizer', -- 1301 observations
138+
'High flow neb', -- 10785 observations
139+
'Nasal cannula'
123140
)
124-
THEN 'Oxygen'
125-
-- Not categorized:
126-
-- 'Other', 'None'
141+
THEN 'SupplementalOxygen'
142+
WHEN o2_delivery_device_1 in
143+
(
144+
'None'
145+
)
146+
THEN 'None'
147+
-- not categorized: other
127148
ELSE NULL END AS ventilation_status
128149
FROM tm
129150
LEFT JOIN mimic_derived.ventilator_setting vs
@@ -138,8 +159,8 @@ WITH tm AS
138159
SELECT
139160
stay_id, charttime
140161
-- source data columns, here for debug
141-
, o2_delivery_device_1
142-
, vent_mode
162+
-- , o2_delivery_device_1
163+
-- , vent_mode
143164
-- carry over the previous charttime which had the same state
144165
, LAG(charttime, 1) OVER (PARTITION BY stay_id, ventilation_status ORDER BY charttime) AS charttime_lag
145166
-- bring back the next charttime, regardless of the state
@@ -155,34 +176,42 @@ WITH tm AS
155176
(
156177
SELECT
157178
stay_id
158-
-- source data columns, here for debug
159-
, o2_delivery_device_1
160-
, vent_mode
161-
, charttime_lag
162179
, charttime
180+
, charttime_lag
163181
, charttime_lead
164182
, ventilation_status
165183

184+
-- source data columns, here for debug
185+
-- , o2_delivery_device_1
186+
-- , vent_mode
187+
166188
-- calculate the time since the last event
167189
, DATETIME_DIFF(charttime,charttime_lag,'MINUTE')/60 as ventduration
168190

169191
-- now we determine if the current ventilation status is "new", or continuing the previous
170192
, CASE
193+
-- if lag is null, this is the first event for the patient
194+
WHEN ventilation_status_lag IS NULL THEN 1
171195
-- a 14 hour gap always initiates a new event
172196
WHEN DATETIME_DIFF(charttime,charttime_lag,'HOUR') >= 14 THEN 1
173-
WHEN ventilation_status_lag IS NULL THEN 1
174197
-- not a new event if identical to the last row
175198
WHEN ventilation_status_lag != ventilation_status THEN 1
176199
ELSE 0
177-
END AS new_status
200+
END AS new_ventilation_event
178201
FROM vd0
179202
)
180203
, vd2 as
181204
(
182-
SELECT vd1.*
205+
SELECT vd1.stay_id, vd1.charttime
206+
, ventduration, new_ventilation_event
183207
-- create a cumulative sum of the instances of new ventilation
184-
-- this results in a monotonic integer assigned to each instance of ventilation
185-
, SUM(new_status) OVER (PARTITION BY stay_id ORDER BY charttime) AS vent_num
208+
-- this results in a monotonically increasing integer assigned
209+
-- to each instance of ventilation
210+
, SUM(new_ventilation_event) OVER
211+
(
212+
PARTITION BY stay_id
213+
ORDER BY charttime
214+
) AS vent_seq
186215
FROM vd1
187216
)
188217
-- create the durations for each ventilation instance
@@ -200,9 +229,10 @@ SELECT stay_id
200229
END
201230
) AS endtime
202231
-- all rows with the same vent_num will have the same ventilation_status
203-
-- for efficiency, we use an aggregate here, but we could equally well group by this column
232+
-- for efficiency, we use an aggregate here,
233+
-- but we could equally well group by this column
204234
, MAX(ventilation_status) AS ventilation_status
205235
FROM vd2
206-
GROUP BY stay_id, vent_num
236+
GROUP BY stay_id, vent_seq
207237
HAVING min(charttime) != max(charttime)
208238
;

mimic-iv/tests/README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ With the account configured, we can create a key for the service, and download t
5353
**This is the only copy of the private key.** Don't lose it :)
5454

5555
```sh
56-
gcloud iam service-accounts keys create `key.json` --iam-account [email protected]
56+
gcloud iam service-accounts keys create "key.json" --iam-account [email protected]
5757
```
5858

5959
The tests will use the `key.json` file in the `tests/` folder by default, so you should now be able to run the tests!
60-
61-
(NOTE: this service account will not have access to MIMIC-IV itself. This is a TODO!).

mimic-iv/tests/conftest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from collections import namedtuple
3+
from pathlib import Path
34

45
import pytest
56
from google.cloud import bigquery
@@ -33,3 +34,25 @@ def project_id():
3334
Return the name of the BigQuery project used.
3435
"""
3536
return 'physionet-data'
37+
38+
@pytest.fixture(scope="session")
39+
def concept_folders():
40+
"""
41+
Returns the folders containing concepts.
42+
"""
43+
return ['comorbidity', 'demographics', 'measurement', 'medication', 'organfailure', 'treatment', 'score', 'sepsis', 'firstday']
44+
45+
@pytest.fixture(scope="session")
46+
def concepts(concept_folders):
47+
"""
48+
Returns all concepts which should be generated.
49+
"""
50+
concepts = {}
51+
current_dir = Path(__file__).parent
52+
concept_dir = current_dir / '../concepts'
53+
for folder in concept_folders:
54+
files = os.listdir(concept_dir / folder)
55+
# add list of the concepts in this folder to the dict
56+
concepts[folder] = [f[:-4] for f in files if f.endswith('.sql')]
57+
58+
return concepts

mimic-iv/tests/test_all_tables.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import pandas as pd
2+
from pandas.io import gbq
3+
4+
def test_tables_have_data(dataset, project_id, concepts):
5+
"""Verifies each table has data."""
6+
7+
for folder, concept_list in concepts.items():
8+
for concept_name in concept_list:
9+
query = f"""
10+
SELECT *
11+
FROM {dataset}.{concept_name}
12+
LIMIT 5
13+
"""
14+
df = gbq.read_gbq(query, project_id=project_id, dialect="standard")
15+
assert df.shape[0] > 0, f'did not find table for {folder}.{concept_name}'

0 commit comments

Comments
 (0)