|
17 | 17 | if not args.output_path: |
18 | 18 | args.output_path = args.combined_table.replace(".tsv", "_with_phenotype_summary.tsv") |
19 | 19 |
|
20 | | -prompt_prefix1 = """You are a clinical geneticist. You have assembled known gene-disease associations from authoritative sources that include OMIM, GenCC, ClinGen, ClinVar, and PanelApp. Now, you need to condense the phenotypes described in these different sources into a single concise comma-separate list that covers the primary features or symptoms of the disease, as well as the main organ systems that are affected. For example, when the source phenotypes are: |
| 20 | +prompt_prefix1 = """You are a clinical geneticist. You have assembled known gene-disease associations from authoritative sources that include OMIM, GenCC, ClinGen, ClinVar, PanelApp, Orphanet, and dbNSFP. Now, you need to condense the phenotypes described in these different sources into a single concise comma-separate list that covers the primary features or symptoms of the disease, as well as the main organ systems that are affected. For example, when the source phenotypes are: |
21 | 21 |
|
22 | 22 | OMIM: 'Congenital disorder of glycosylation, type Ie', CLINGEN: 'congenital disorder of glycosylation type 1E', PANEL APP UK: 'Congenital disorder of glycosylation, type Ie, OMIM:608799, GDP-Man:Dol-P mannosyltransferase deficiency (Disorders of m |
23 | 23 | ultiple glycosylation and other glycosylation pathways); Congenital disorder of glycosylation, type Ie, OMIM:608799; Congenital disorder of glycosylation, type Ie, OMIM:608799; Congenital disorder of glycosylation, type Ie, OMIM:608799; Congenital |
|
34 | 34 | """ |
35 | 35 |
|
36 | 36 | prompt_prefix2 = """ |
37 | | -You are a clinical geneticist. You have assembled known gene-disease associations from authoritative sources that include OMIM, GenCC, ClinGen, ClinVar, and PanelApp. Now, you need to select a single |
| 37 | +You are a clinical geneticist. You have assembled known gene-disease associations from authoritative sources that include OMIM, GenCC, ClinGen, ClinVar, PanelApp, Orphanet, and dbNSFP. Now, you need to select a single |
38 | 38 | disease category that is the best match for the provided phenotypes. The possible disease categories are: |
39 | 39 |
|
40 | 40 | 'BIOCHEMICAL/METABOLIC', |
@@ -85,8 +85,10 @@ def summarize_phenotypes(row, prompt_prefix=prompt_prefix1, blank_if_no_phenotyp |
85 | 85 | ("PANEL_APP_AU", "PANEL_APP_AU_phenotypes"), |
86 | 86 | ("CLINVAR", "CLINVAR_phenotypes"), |
87 | 87 | ("FRIDMAN", "FRIDMAN_phenotype_category"), |
| 88 | + ("ORPHANET", "DBNSFP_orphanet_disorder"), |
| 89 | + ("DBNSFP_DISEASE", "DBNSFP_disease_description"), |
88 | 90 | ]: |
89 | | - if not pd.isna(row[phenotype_column]): |
| 91 | + if phenotype_column in row and not pd.isna(row[phenotype_column]): |
90 | 92 | phenotypes.append(f"{label}: {row[phenotype_column]}") |
91 | 93 |
|
92 | 94 | if not phenotypes: |
|
0 commit comments