Skip to content

Commit 79eb377

Browse files
authored
Merge pull request #1447 from MIT-LCP/gcs_fix
Fix bug in calculation of first day GCS
2 parents 8047c9d + e39b411 commit 79eb377

File tree

2 files changed

+54
-4
lines changed

2 files changed

+54
-4
lines changed

mimic-iv/concepts/firstday/first_day_gcs.sql

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,25 @@
1313
WITH gcs_final AS
1414
(
1515
SELECT
16-
gcs.*
16+
ie.subject_id, ie.stay_id
17+
, g.gcs
18+
, g.gcs_motor
19+
, g.gcs_verbal
20+
, g.gcs_eyes
21+
, g.gcs_unable
1722
-- This sorts the data by GCS
1823
-- rn = 1 is the the lowest total GCS value
1924
, ROW_NUMBER () OVER
2025
(
21-
PARTITION BY gcs.stay_id
22-
ORDER BY gcs.GCS
26+
PARTITION BY g.stay_id
27+
ORDER BY g.GCS
2328
) as gcs_seq
24-
FROM `physionet-data.mimiciv_derived.gcs` gcs
29+
FROM `physionet-data.mimiciv_icu.icustays` ie
30+
-- Only get data for the first 24 hours
31+
LEFT JOIN `physionet-data.mimiciv_derived.gcs` g
32+
ON ie.stay_id = g.stay_id
33+
AND g.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
34+
AND g.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
2535
)
2636
SELECT
2737
ie.subject_id

mimic-iv/tests/test_first_day.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pandas as pd
2+
from pandas.io import gbq
3+
4+
def test_gcs_first_day_calculated_correctly(dataset, project_id):
5+
"""Verifies GCS first day values are calculated correctly."""
6+
# almost every individual should have a GCS first day
7+
query = f"""
8+
SELECT COUNT(*) AS n, COUNT(g.gcs) AS n_gcs
9+
FROM {dataset}.first_day_gcs g
10+
"""
11+
df = gbq.read_gbq(query, project_id=project_id, dialect="standard")
12+
n, n_gcs = df.iloc[0, 0], df.iloc[0, 1]
13+
frac = float(n_gcs) / n * 100.0
14+
assert frac > 98, 'less than 98%% of stays have a first day GCS'
15+
16+
17+
# verify a subset of values
18+
known_values = {
19+
37535507: {'gcs': 13, 'gcs_motor': 4, 'gcs_verbal': None, 'gcs_eyes': None},
20+
38852627: {'gcs': None, 'gcs_motor': None, 'gcs_verbal': None, 'gcs_eyes': None},
21+
32435143: {'gcs': 8, 'gcs_motor': 5, 'gcs_verbal': 1, 'gcs_eyes': 2},
22+
}
23+
query = f"""
24+
SELECT g.stay_id
25+
, g.gcs
26+
, g.gcs_motor
27+
, g.gcs_verbal
28+
, g.gcs_eyes
29+
, g.gcs_unable
30+
FROM {dataset}.first_day_gcs g
31+
WHERE g.stay_id IN
32+
(
33+
{','.join([str(x) for x in known_values.keys()])}
34+
)
35+
"""
36+
df = gbq.read_gbq(query, project_id=project_id, dialect="standard")
37+
df = df.sort_values(['stay_id']).set_index('stay_id')
38+
for stay_id, row in df.iterrows():
39+
for col, expected_val in known_values[stay_id].items():
40+
assert row[col] == expected_val, f'first_day_gcs {col} value incorrect for stay_id={stay_id}'

0 commit comments

Comments
 (0)