Skip to content

Commit 4bc7dec

Browse files
committed
handle empty responses when prepping analysis summary
1 parent 9ea52db commit 4bc7dec

File tree

1 file changed

+106
-44
lines changed

1 file changed

+106
-44
lines changed

pori_python/ipr/summary.py

Lines changed: 106 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@
1212
from pori_python.graphkb.vocab import get_term_tree
1313
from pori_python.ipr.inputs import create_graphkb_sv_notation
1414
from pori_python.ipr.connection import IprConnection
15-
from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement
15+
from pori_python.types import (
16+
Hashabledict,
17+
IprVariant,
18+
KbMatch,
19+
Ontology,
20+
Record,
21+
Statement,
22+
)
1623

1724
from .util import (
1825
convert_to_rid_set,
@@ -79,7 +86,15 @@ def merge_diseases(
7986
[d["@class"] == "Disease" for d in diseases]
8087
):
8188
words = sorted(
82-
list(set([get_displayname(s) for s in diseases if s["@rid"] in disease_matches]))
89+
list(
90+
set(
91+
[
92+
get_displayname(s)
93+
for s in diseases
94+
if s["@rid"] in disease_matches
95+
]
96+
)
97+
)
8398
)
8499
words.append(OTHER_DISEASES)
85100
return natural_join(words)
@@ -117,12 +132,18 @@ def substitute_sentence_template(
117132
# remove subject from the conditions replacements
118133
subjects_ids = convert_to_rid_set(subjects)
119134
disease_conditions = [
120-
cast(Ontology, d) for d in disease_conditions if d["@rid"] not in subjects_ids
135+
cast(Ontology, d)
136+
for d in disease_conditions
137+
if d["@rid"] not in subjects_ids
121138
]
122139
variant_conditions = [
123-
cast(Ontology, d) for d in variant_conditions if d["@rid"] not in subjects_ids
140+
cast(Ontology, d)
141+
for d in variant_conditions
142+
if d["@rid"] not in subjects_ids
143+
]
144+
other_conditions = [
145+
d for d in other_conditions if d["@rid"] not in subjects_ids
124146
]
125-
other_conditions = [d for d in other_conditions if d["@rid"] not in subjects_ids]
126147

127148
result = result.replace(r"{subject}", merge_diseases(subjects, disease_matches))
128149

@@ -134,7 +155,9 @@ def substitute_sentence_template(
134155
other_conditions.extend(disease_conditions)
135156

136157
if r"{conditions:variant}" in template:
137-
result = result.replace(r"{conditions:variant}", natural_join_records(variant_conditions))
158+
result = result.replace(
159+
r"{conditions:variant}", natural_join_records(variant_conditions)
160+
)
138161
else:
139162
other_conditions.extend(variant_conditions)
140163

@@ -165,7 +188,9 @@ def aggregate_statements(
165188
def generate_key(statement: Statement) -> Tuple:
166189
result = [
167190
cond.get("displayName", cond["@rid"])
168-
for cond in filter_by_record_class(statement["conditions"], "Disease", exclude=True)
191+
for cond in filter_by_record_class(
192+
statement["conditions"], "Disease", exclude=True
193+
)
169194
if cond["@rid"] != statement["subject"]["@rid"]
170195
]
171196
if statement.get("subject", {}).get("@class", "Disease") != "Disease":
@@ -225,7 +250,9 @@ def display_variant(variant: IprVariant) -> str:
225250
# Use chosen legacy 'proteinChange' or an hgvs description of lowest detail.
226251
hgvs = variant.get(
227252
"proteinChange",
228-
variant.get("hgvsProtein", variant.get("hgvsCds", variant.get("hgvsGenomic", ""))),
253+
variant.get(
254+
"hgvsProtein", variant.get("hgvsCds", variant.get("hgvsGenomic", ""))
255+
),
229256
)
230257

231258
if gene and hgvs:
@@ -237,14 +264,16 @@ def display_variant(variant: IprVariant) -> str:
237264

238265

239266
def display_variants(gene_name: str, variants: List[IprVariant]) -> str:
240-
result = sorted(list({v for v in [display_variant(e) for e in variants] if gene_name in v}))
267+
result = sorted(
268+
list({v for v in [display_variant(e) for e in variants] if gene_name in v})
269+
)
241270
variants_text = natural_join(result)
242271
if len(result) > 1:
272+
return f"Multiple variants of the gene {gene_name} were observed in this case: {variants_text}"
273+
elif result:
243274
return (
244-
f"Multiple variants of the gene {gene_name} were observed in this case: {variants_text}"
275+
f"{variants_text[0].upper()}{variants_text[1:]} was observed in this case."
245276
)
246-
elif result:
247-
return f"{variants_text[0].upper()}{variants_text[1:]} was observed in this case."
248277
return ""
249278

250279

@@ -265,7 +294,9 @@ def create_section_html(
265294
for statement_id, sentence in sentences_by_statement_id.items():
266295
relevance = statements[statement_id]["relevance"]["@rid"]
267296
category = categorize_relevance(
268-
graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])]
297+
graphkb_conn,
298+
relevance,
299+
RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
269300
)
270301
sentence_categories[sentence] = category
271302

@@ -275,7 +306,12 @@ def create_section_html(
275306
"target": "Feature",
276307
"filters": {
277308
"AND": [
278-
{"source": {"target": "Source", "filters": {"name": "entrez gene"}}},
309+
{
310+
"source": {
311+
"target": "Source",
312+
"filters": {"name": "entrez gene"},
313+
}
314+
},
279315
{"name": gene_name},
280316
{"biotype": "gene"},
281317
]
@@ -308,11 +344,22 @@ def create_section_html(
308344
for section in [
309345
{s for (s, v) in sentence_categories.items() if v == "diagnostic"},
310346
{s for (s, v) in sentence_categories.items() if v == "biological"},
311-
{s for (s, v) in sentence_categories.items() if v in ["therapeutic", "prognostic"]},
312347
{
313348
s
314349
for (s, v) in sentence_categories.items()
315-
if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"]
350+
if v in ["therapeutic", "prognostic"]
351+
},
352+
{
353+
s
354+
for (s, v) in sentence_categories.items()
355+
if v
356+
not in [
357+
"diagnostic",
358+
"biological",
359+
"therapeutic",
360+
"prognostic",
361+
"resistance",
362+
]
316363
},
317364
{s for (s, v) in sentence_categories.items() if v == "resistance"},
318365
]:
@@ -344,10 +391,10 @@ def section_statements_by_genes(
344391

345392

346393
def prep_single_ipr_variant_comment(item):
347-
cancer_type = ','.join(item['cancerType'])
394+
cancer_type = ",".join(item["cancerType"])
348395
if not cancer_type:
349-
cancer_type = 'no specific cancer types'
350-
cancer_type = f' ({cancer_type})'
396+
cancer_type = "no specific cancer types"
397+
cancer_type = f" ({cancer_type})"
351398
section = [f"<h2>{item['variantName']}{cancer_type}</h2>"]
352399
section.append(f"<p>{item['text']}</p>")
353400
return section
@@ -364,31 +411,42 @@ def ipr_analyst_comments(
364411
"<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
365412
]
366413

367-
items = []
368-
369-
templates = ipr_conn.get(f'templates?name={report_type}')
370-
# if this is genomic expect two results - one 'pharmacogenomic'
371-
template_ident = [item for item in templates if item['name'] == report_type][0]['ident']
372-
373-
projects = ipr_conn.get(f'project')
374-
project_ident = [item for item in projects if item['name'] == project_name][0]['ident']
375-
376-
match_set = list(set([item['kbVariant'] for item in matches]))
414+
template_ident = None
415+
templates = ipr_conn.get(f"templates?name={report_type}")
416+
# if this is genomic expect two results - one 'pharmacogenomic', which must be removed
417+
if templates:
418+
template_idents = [item for item in templates if item["name"] == report_type]
419+
if template_idents:
420+
template_ident = template_idents[0]["ident"]
421+
422+
project_ident = None
423+
projects = ipr_conn.get(f"project")
424+
if projects:
425+
project_idents = [item for item in projects if item["name"] == project_name]
426+
if project_idents:
427+
project_ident = project_idents[0]["ident"]
428+
429+
match_set = list(set([item["kbVariant"] for item in matches]))
430+
431+
if project_ident and template_ident:
432+
for variant in match_set:
433+
itemlist = ipr_conn.get(
434+
"variant-text",
435+
data={
436+
"variantName": variant,
437+
"template": template_ident,
438+
"project": project_ident,
439+
},
440+
)
441+
if itemlist:
442+
for item in itemlist:
443+
# include matching cancer type OR no cancer type specified
444+
if not item["cancerType"] or disease_name in item["cancerType"]:
445+
section = prep_single_ipr_variant_comment(item)
446+
output.extend(section)
377447

378-
for variant in match_set:
379-
itemlist = ipr_conn.get(
380-
'variant-text',
381-
data={'variantName': variant, 'template': template_ident, 'project': project_ident},
382-
)
383-
if itemlist:
384-
for item in itemlist:
385-
if not item['cancerType'] or disease_name in item['cancerType']:
386-
section = prep_single_ipr_variant_comment(item)
387-
output.extend(section)
388-
else:
389-
ipr_conn.get('variant-text')
390448
if not output:
391-
output = ['No comments found in IPR for variants in this report']
449+
output = ["No comments found in IPR for variants in this report"]
392450
return "\n".join(output)
393451

394452

@@ -432,10 +490,14 @@ def auto_analyst_comments(
432490
# aggregate similar sentences
433491
sentences = {}
434492
for template, group in templates.items():
435-
sentences.update(aggregate_statements(graphkb_conn, template, group, disease_matches))
493+
sentences.update(
494+
aggregate_statements(graphkb_conn, template, group, disease_matches)
495+
)
436496

437497
# section statements by genes
438-
statements_by_genes = section_statements_by_genes(graphkb_conn, list(statements.values()))
498+
statements_by_genes = section_statements_by_genes(
499+
graphkb_conn, list(statements.values())
500+
)
439501

440502
output: List[str] = [
441503
"<h3>The comments below were automatically generated from matches to GraphKB and have not been manually reviewed</h3>"

0 commit comments

Comments
 (0)