|
| 1 | +import csv |
1 | 2 | import random |
2 | 3 | import re |
3 | 4 | import typing |
|
9 | 10 | from utils import path as path |
10 | 11 |
|
11 | 12 | enhancements = path.load_loinc_enhancements() |
12 | | - |
13 | 13 | LOINC_ENHANCEMENTS = normalize.merge_enhancements(enhancements) |
14 | 14 |
|
15 | 15 |
|
@@ -450,3 +450,61 @@ def generate_augmented_examples( |
450 | 450 | augmented_examples.append(ex_code) |
451 | 451 |
|
452 | 452 | return augmented_examples |
| 453 | + |
| 454 | + |
| 455 | +def build_augmented_loinc_files( |
| 456 | + input_path: str, |
| 457 | + config: schemas.LoincFileGenerationConfig, |
| 458 | + num_lcn: int = 5, |
| 459 | + num_sn: int = 5, |
| 460 | + num_dn: int = 5, |
| 461 | + output_path_base: str = "./data/training_files/augmented_loinc", |
| 462 | +) -> None: |
| 463 | + """ |
| 464 | + Generates augmented LOINC data files for the long common names, short |
| 465 | + common names, and display names based on the provided configurations. |
| 466 | +
|
| 467 | + :param path: The path to the base LOINC name file. |
| 468 | + :param configs: Configuration dictionaries for long common names, short |
| 469 | + common names, and display names. |
| 470 | + :param num_lcn: The number of augmented long common names to generate. |
| 471 | + :param num_sn: The number of augmented short common names to generate. |
| 472 | + :param num_dn: The number of augmented display names to generate. |
| 473 | + :param output_files_base: The base path for the output files. |
| 474 | + :return: None |
| 475 | + """ |
| 476 | + |
| 477 | + num_map = {"short_name": num_sn, "long_common_name": num_lcn, "display_name": num_dn} |
| 478 | + |
| 479 | + # Read in data/loinc_lab_names_XXXX.csv |
| 480 | + with open( |
| 481 | + input_path, |
| 482 | + encoding="utf-8", |
| 483 | + ) as fp: |
| 484 | + data = fp.readlines() |
| 485 | + |
| 486 | + for row in data: |
| 487 | + r = row.split("|") |
| 488 | + # skip any malformed rows |
| 489 | + if len(r) < 6: |
| 490 | + continue |
| 491 | + |
| 492 | + loinc_code, short_name, long_name, display_name = r[0], r[1], r[2], r[3] |
| 493 | + related_names = r[5].split(";") if r[5] else [] |
| 494 | + |
| 495 | + values = { |
| 496 | + "short_name": short_name, |
| 497 | + "long_common_name": long_name, |
| 498 | + "display_name": display_name, |
| 499 | + } |
| 500 | + |
| 501 | + for key, base_value in values.items(): |
| 502 | + augmented_examples = generate_augmented_examples( |
| 503 | + base_value, related_names, num_map[key], config[key] |
| 504 | + ) |
| 505 | + |
| 506 | + # Append data to respective files |
| 507 | + # Note: these files should be opened using a CSV reader |
| 508 | + with open(f"{output_path_base}_{key}.csv", "a", encoding="utf-8", newline="") as fp: |
| 509 | + writer = csv.writer(fp, delimiter=":") # use ":" instead of default "," |
| 510 | + writer.writerow([loinc_code, base_value, "|".join(augmented_examples)]) |
0 commit comments