|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +"""This script provides the ability to pull down various medical terminology |
| 4 | +valusets required for processing eICR data, specifically adding codes to |
| 5 | +different data elements within an eICR message, such as Lab Order Name or |
| 6 | +Lab Result Interpretation. |
| 7 | +
|
| 8 | +Current Available Valuesets: |
| 9 | + - Lab Names (Ordering & Resulting) - LOINC |
| 10 | + - Lab Orders - LOINC |
| 11 | + - Lab Observations - LOINC |
| 12 | + - Lab Result Value - SNOMED |
| 13 | + - Lab Result Interpretation - HL7 Observation Interpretations |
| 14 | +
|
| 15 | +Requirements: |
| 16 | + - SNOMED - requires an UMLS API KEY stored in an environment variable: |
| 17 | + - UMLS_API_KEY |
| 18 | + - LOINC - requires a LOINC username and password stored in environment variables: |
| 19 | + - LOINC_USERNAME |
| 20 | + - LOINC_PWD |
| 21 | +""" |
| 22 | + |
| 23 | +import argparse |
| 24 | +import csv |
| 25 | +import os |
| 26 | +import sys |
| 27 | + |
| 28 | +import requests |
| 29 | + |
| 30 | +# Set Terminology URLS |
| 31 | +LOINC_BASE_URL = "https://loinc.regenstrief.org/searchapi/loincs?" |
| 32 | +LOINC_LAB_ORDER_SUFFIX = "query=orderobs:Order+OR+orderobs:Both&rows=500" |
| 33 | +LOINC_LAB_RESULT_SUFFIX = "query=orderobs:Observation+OR+orderobs:Both&rows=500" |
| 34 | +LOINC_LAB_NAMES_SUFFIX = "query=orderobs:Order+OR+orderobs:Both+OR+orderobs:Observation" |
| 35 | +HL7_LAB_INTERP_URL = ( |
| 36 | + "https://www.fhir.org/guides/stats2/valueset-us.nlm.vsac-2.16.840.1.113883.1.11.78.json" |
| 37 | +) |
| 38 | +UMLS_SNOMED_LAB_VALUES_URL = ( |
| 39 | + "https://uts-ws.nlm.nih.gov/rest/content/current/source/SNOMEDCT_US/260245000/descendants" |
| 40 | +) |
| 41 | + |
| 42 | +# Get Terminology Usernames and Passwords |
| 43 | +LOINC_USERNAME = os.environ.get("LOINC_USERNAME") |
| 44 | +LOINC_PWD = os.environ.get("LOINC_PWD") |
| 45 | +UMLS_API_KEY = os.environ.get("UMLS_API_KEY") |
| 46 | + |
| 47 | +# CSV file settings |
| 48 | +CSV_DIRECTORY = "tmp/" |
| 49 | + |
| 50 | + |
| 51 | +def get_umls_snomed_lab_values(): # noqa: D103 |
| 52 | + if UMLS_API_KEY is None: |
| 53 | + raise KeyError("UMLS_API_KEY Environment Variable must be set to a proper UMLS API Key!") |
| 54 | + snomed_filename = "snomed_lab_value.csv" |
| 55 | + page_num = 1 |
| 56 | + page_size = 500 |
| 57 | + params = {"apiKey": UMLS_API_KEY, "pageNumber": page_num, "pageSize": page_size} |
| 58 | + umls_response = requests.get(UMLS_SNOMED_LAB_VALUES_URL, params=params) |
| 59 | + snomed_row_count = 0 |
| 60 | + snomed_rows = [] |
| 61 | + |
| 62 | + while umls_response.status_code == 200: |
| 63 | + # NOTE: the UMLS responses are a bit slow |
| 64 | + # you can use the print statement below to get a |
| 65 | + # better idea of the progress if needed. |
| 66 | + # print(f"Processing SNOMED page {page_num}") |
| 67 | + umls_results = umls_response.json().get("result") |
| 68 | + |
| 69 | + for result in umls_results: |
| 70 | + snomed_code = result.get("ui") |
| 71 | + snomed_text = result.get("name") |
| 72 | + if snomed_code and snomed_text: |
| 73 | + result_row = {"code": snomed_code, "text": snomed_text} |
| 74 | + snomed_rows.append(result_row) |
| 75 | + snomed_row_count += 1 |
| 76 | + |
| 77 | + page_num += 1 |
| 78 | + params = {"apiKey": UMLS_API_KEY, "pageNumber": page_num, "pageSize": page_size} |
| 79 | + umls_response = requests.get(UMLS_SNOMED_LAB_VALUES_URL, params=params) |
| 80 | + |
| 81 | + print(f"{snomed_row_count} Codes Extracted") |
| 82 | + save_valueset_csv_file(snomed_filename, snomed_rows) |
| 83 | + |
| 84 | + |
| 85 | +def get_hl7_lab_interp(): # noqa: D103 |
| 86 | + hl7_filename = "hl7_lab_interp.csv" |
| 87 | + hl7_response = requests.get(HL7_LAB_INTERP_URL) |
| 88 | + |
| 89 | + if hl7_response.status_code != 200: |
| 90 | + print( |
| 91 | + f"ERROR Retrieving HL7 LAB Interpretation CODES: {hl7_response.status_code}: {hl7_response.text}", |
| 92 | + file=sys.stderr, |
| 93 | + ) |
| 94 | + sys.exit(1) |
| 95 | + hl7_codes = hl7_response.json().get("compose").get("include")[0].get("concept") |
| 96 | + |
| 97 | + if hl7_codes is not None: |
| 98 | + record_count = hl7_response.json().get("expansion").get("total") |
| 99 | + print(f"HL7 Lab Interpretation Record Count: {record_count}") |
| 100 | + |
| 101 | + # replace 'display' key with 'text |
| 102 | + key_replacements = {"display": "text"} |
| 103 | + for hl7_row in hl7_codes: |
| 104 | + for old_key, new_key in key_replacements.items(): |
| 105 | + if old_key in hl7_row: |
| 106 | + hl7_row[new_key] = hl7_row[old_key] |
| 107 | + del hl7_row[old_key] |
| 108 | + save_valueset_csv_file(hl7_filename, hl7_codes) |
| 109 | + |
| 110 | + |
| 111 | +def get_loinc_lab_names(): # noqa: D103 |
| 112 | + api_url = LOINC_BASE_URL + LOINC_LAB_NAMES_SUFFIX |
| 113 | + loinc_filename = "loinc_lab_names.csv" |
| 114 | + loinc_vs_type = "Lab Names" |
| 115 | + loinc_order_rows = process_loinc_valueset(api_url, loinc_vs_type) |
| 116 | + |
| 117 | + save_valueset_csv_file(loinc_filename, loinc_order_rows) |
| 118 | + |
| 119 | + |
| 120 | +def get_loinc_lab_orders(): # noqa: D103 |
| 121 | + api_url = LOINC_BASE_URL + LOINC_LAB_ORDER_SUFFIX |
| 122 | + loinc_filename = "loinc_lab_orders.csv" |
| 123 | + loinc_vs_type = "Lab Orders" |
| 124 | + loinc_order_rows = process_loinc_valueset(api_url, loinc_vs_type) |
| 125 | + |
| 126 | + save_valueset_csv_file(loinc_filename, loinc_order_rows) |
| 127 | + |
| 128 | + |
| 129 | +def get_loinc_lab_results(): # noqa: D103 |
| 130 | + api_url = LOINC_BASE_URL + LOINC_LAB_RESULT_SUFFIX |
| 131 | + loinc_filename = "loinc_lab_result.csv" |
| 132 | + loinc_vs_type = "Lab Results" |
| 133 | + loinc_result_rows = process_loinc_valueset(api_url, loinc_vs_type) |
| 134 | + |
| 135 | + save_valueset_csv_file(loinc_filename, loinc_result_rows) |
| 136 | + |
| 137 | + |
| 138 | +def process_loinc_valueset(api_url, loinc_valueset_type): # noqa: D103 |
| 139 | + if LOINC_USERNAME is None or LOINC_PWD is None: |
| 140 | + raise KeyError( |
| 141 | + "LOINC_USERNAME and LOINC_PWD environment variables are required to pull from LOINC!" |
| 142 | + ) |
| 143 | + loinc_response = requests.get(api_url, auth=(LOINC_USERNAME, LOINC_PWD)) |
| 144 | + if loinc_response.status_code != 200: |
| 145 | + print( |
| 146 | + f"ERROR Retrieving LOINC {loinc_valueset_type} CODES: {loinc_response.status_code}: {loinc_response.text}" |
| 147 | + ) |
| 148 | + return None |
| 149 | + |
| 150 | + loinc_codes = loinc_response.json() |
| 151 | + loinc_rows = [] |
| 152 | + |
| 153 | + record_count = loinc_codes["ResponseSummary"]["RecordsFound"] |
| 154 | + print(f"{loinc_valueset_type} Record Count: {record_count}") |
| 155 | + current_row_count = loinc_codes["ResponseSummary"]["RowsReturned"] |
| 156 | + next_url_call = loinc_codes["ResponseSummary"]["Next"] |
| 157 | + |
| 158 | + while current_row_count > 0 or next_url_call is None: |
| 159 | + loinc_rows = process_loinc_results(loinc_codes["Results"], loinc_rows) |
| 160 | + |
| 161 | + next_loinc_response = requests.get(next_url_call, auth=(LOINC_USERNAME, LOINC_PWD)) |
| 162 | + if next_loinc_response.status_code != 200: |
| 163 | + print( |
| 164 | + f"ERROR Retrieving LOINC {loinc_valueset_type} CODES: {next_loinc_response.status_code}: {next_loinc_response.text}" |
| 165 | + ) |
| 166 | + return |
| 167 | + loinc_codes = next_loinc_response.json() |
| 168 | + current_row_count = loinc_codes["ResponseSummary"]["RowsReturned"] |
| 169 | + next_url_call = loinc_codes.get("ResponseSummary").get("Next") |
| 170 | + if next_url_call is None: |
| 171 | + break |
| 172 | + |
| 173 | + return loinc_rows |
| 174 | + |
| 175 | + |
| 176 | +def process_loinc_results(loinc_results, loinc_order_rows) -> dict: # noqa: D103 |
| 177 | + if len(loinc_results) == 0: |
| 178 | + print("NO RESULTS TO PROCESS!") |
| 179 | + return loinc_order_rows |
| 180 | + |
| 181 | + for loinc_result in loinc_results: |
| 182 | + loinc_order_rows = get_all_loinc_terms_per_code(loinc_result, loinc_order_rows) |
| 183 | + |
| 184 | + return loinc_order_rows |
| 185 | + |
| 186 | + |
| 187 | +def get_all_loinc_terms_per_code(loinc_result: dict, loinc_order_rows) -> dict: # noqa: D103 |
| 188 | + result_code = loinc_result.get("LOINC_NUM") |
| 189 | + if loinc_result.get("SHORTNAME") is not None: |
| 190 | + result_row = {"code": result_code, "text": loinc_result.get("SHORTNAME")} |
| 191 | + loinc_order_rows.append(result_row) |
| 192 | + if loinc_result.get("LONG_COMMON_NAME") is not None: |
| 193 | + result_row = {"code": result_code, "text": loinc_result.get("LONG_COMMON_NAME")} |
| 194 | + loinc_order_rows.append(result_row) |
| 195 | + |
| 196 | + # NOTE: There are other fields that have additional descriptions that we can pull |
| 197 | + # from as well. ie. TermDescriptions [], FormalName, and DisplayName. |
| 198 | + # Will leave these out for now. |
| 199 | + |
| 200 | + return loinc_order_rows |
| 201 | + |
| 202 | + |
| 203 | +def save_valueset_csv_file(filename: str, contents: dict): # noqa: D103 |
| 204 | + if not filename.strip(): |
| 205 | + print("No filename supplied. Failed to save CSV file!") |
| 206 | + return |
| 207 | + |
| 208 | + if contents is None and len(contents) == 0: |
| 209 | + print("Empty file contents! Failed to save CSV!") |
| 210 | + return |
| 211 | + |
| 212 | + try: |
| 213 | + full_file_path = os.path.join(CSV_DIRECTORY, filename) |
| 214 | + csv_headers = contents[0].keys() |
| 215 | + |
| 216 | + with open(full_file_path, "w", newline="", encoding="utf-8") as csvfile: |
| 217 | + writer = csv.DictWriter(csvfile, csv_headers) |
| 218 | + writer.writeheader() |
| 219 | + writer.writerows(contents) |
| 220 | + print(f"CSV File successfully saved as {full_file_path}") |
| 221 | + |
| 222 | + except ValueError as e: |
| 223 | + print(f"Error parsing Dict Contents: {e}") |
| 224 | + except Exception as e: |
| 225 | + print(f"An error occured: {e}") |
| 226 | + |
| 227 | + |
| 228 | +def main( # noqa: D103 |
| 229 | + all_vs: bool, |
| 230 | + lab_orders: bool, |
| 231 | + lab_obs: bool, |
| 232 | + lab_values: bool, |
| 233 | + lab_interp: bool, |
| 234 | + lab_names: bool, |
| 235 | +): # noqa: D103 |
| 236 | + if all_vs or lab_orders: |
| 237 | + get_loinc_lab_orders() |
| 238 | + if all_vs or lab_obs: |
| 239 | + get_loinc_lab_results() |
| 240 | + if all_vs or lab_values: |
| 241 | + get_umls_snomed_lab_values() |
| 242 | + if all_vs or lab_interp: |
| 243 | + get_hl7_lab_interp() |
| 244 | + if all_vs or lab_names: |
| 245 | + get_loinc_lab_names() |
| 246 | + |
| 247 | + |
| 248 | +if __name__ == "__main__": |
| 249 | + parser = argparse.ArgumentParser( |
| 250 | + description="A script to pull down various Medical Terminology Value Set Codes and Texts, specify which sets." |
| 251 | + ) |
| 252 | + parser.add_argument( |
| 253 | + "--lab_names", action="store_true", help="For ALL Loinc Lab Names both Ordering & Resulting" |
| 254 | + ) |
| 255 | + parser.add_argument("--lab_orders", action="store_true", help="For Loinc Lab Orders") |
| 256 | + parser.add_argument("--lab_obs", action="store_true", help="For Loinc Lab Observations") |
| 257 | + parser.add_argument("--lab_values", action="store_true", help="For Snomed Lab Result Values") |
| 258 | + parser.add_argument("--lab_interp", action="store_true", help="For HL7 Lab Interpretations") |
| 259 | + parser.add_argument("--all", action="store_true", help="If present, pulls all value sets") |
| 260 | + |
| 261 | + args = parser.parse_args() |
| 262 | + main(args.all, args.lab_orders, args.lab_obs, args.lab_values, args.lab_interp, args.lab_names) |
0 commit comments