-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathexport_groupers.py
More file actions
129 lines (115 loc) · 4.87 KB
/
Copy pathexport_groupers.py
File metadata and controls
129 lines (115 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import csv
import json
from datetime import datetime
from pathlib import Path
EXPORTS_DIR = Path(__file__).parent
SCRIPTS_DIR = EXPORTS_DIR.parent
TES_DATA_DIR = SCRIPTS_DIR / "data" / "tes"
OUTFILE = (
EXPORTS_DIR / f"tes-export-groupers-{datetime.today().strftime('%Y-%m-%d')}.csv"
)
def parse_snomed_from_url(url: str) -> str | None:
"""
Extract the RSG SNOMED CT code from its canonical url.
"""
if "rs-grouper-" in url:
return url.split("rs-grouper-")[-1]
return None
def main():
"""
Run the script to export CSV file to show relationship between CGs <-> RSGs.
"""
print("🌱 Starting grouper CSV export...")
# load all ValueSets, keyed by (url, version)
all_valuesets = {}
json_files = [f for f in TES_DATA_DIR.glob("*.json") if f.name != "manifest.json"]
print(f"🔎 Found {len(json_files)} JSON file(s) in {TES_DATA_DIR}")
for file_path in json_files:
print(f"📖 Reading {file_path.name}...")
try:
with open(file_path) as f:
doc = json.load(f)
for vs in doc.get("valuesets", []):
key = (vs.get("url"), vs.get("version"))
all_valuesets[key] = vs
except Exception as e:
print(f"⚠️ Failed to read {file_path.name}: {e}")
rows = []
parent_count = 0
relation_count = 0
# iterate parent ValueSets that reference child ValueSets
for parent in all_valuesets.values():
includes = parent.get("compose", {}).get("include", [])
has_children = any("valueSet" in inc for inc in includes)
if not has_children:
continue
parent_count += 1
condition_grouper_name = parent.get("name") or parent.get("title")
condition_grouper_canonical_url = parent.get("url")
condition_grouper_version = parent.get("version")
for include in includes:
for child_ref in include.get("valueSet", []):
try:
(
reporting_spec_grouper_canonical_url,
reporting_spec_grouper_version,
) = child_ref.split("|", 1)
except ValueError:
print(f"⚠️ Skipping malformed child reference: {child_ref}")
continue
child_vs = all_valuesets.get(
(
reporting_spec_grouper_canonical_url,
reporting_spec_grouper_version,
)
)
if not child_vs:
print(
f"⚠️ Could not find child ValueSet: {reporting_spec_grouper_canonical_url}|{reporting_spec_grouper_version}"
)
continue
reporting_spec_grouper_snomed = parse_snomed_from_url(
child_vs.get("url", "")
)
if not reporting_spec_grouper_snomed:
# Skip non-reporting-specification groupers (like additional context groupers)
continue
reporting_spec_grouper_name = child_vs.get("title")
rows.append(
{
"condition_grouper_name": condition_grouper_name,
"condition_grouper_canonical_url": condition_grouper_canonical_url,
"condition_grouper_version": condition_grouper_version,
"reporting_spec_grouper_snomed": reporting_spec_grouper_snomed,
"reporting_spec_grouper_name": reporting_spec_grouper_name,
"reporting_spec_grouper_canonical_url": reporting_spec_grouper_canonical_url,
"reporting_spec_grouper_version": reporting_spec_grouper_version,
}
)
relation_count += 1
print(
f"🧩 Processed {parent_count} parent groupers with {relation_count} parent-child relationships."
)
# write to csv
OUTFILE.parent.mkdir(parents=True, exist_ok=True)
try:
with open(OUTFILE, "w", newline="", encoding="utf-8") as csvfile:
fieldnames = [
"condition_grouper_name",
"condition_grouper_canonical_url",
"condition_grouper_version",
"reporting_spec_grouper_snomed",
"reporting_spec_grouper_name",
"reporting_spec_grouper_canonical_url",
"reporting_spec_grouper_version",
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in rows:
writer.writerow(row)
print(f"✅ Wrote {len(rows)} rows to {OUTFILE}")
print("🎉 Grouper CSV export complete!")
except Exception as e:
print(f"❌ Error writing CSV: {e}")
if __name__ == "__main__":
main()