Skip to content

Commit 29eeb44

Browse files
committed
add tests and comments
1 parent 308c453 commit 29eeb44

File tree

3 files changed

+307
-46
lines changed

3 files changed

+307
-46
lines changed

pori_python/ipr/main.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
germline_kb_matches,
3838
select_expression_plots,
3939
)
40-
from .summary import auto_analyst_comments, ipr_analyst_comments
40+
from .summary import auto_analyst_comments, get_ipr_analyst_comments
4141
from .therapeutic_options import create_therapeutic_options
4242
from .util import LOG_LEVELS, logger, trim_empty_values
4343

@@ -247,6 +247,10 @@ def ipr_report(
247247
custom_kb_match_filter=None,
248248
async_upload: bool = False,
249249
mins_to_wait: int = 5,
250+
include_ipr_variant_text: bool = True,
251+
include_nonspecific_disease: bool = False,
252+
include_nonspecific_project: bool = False,
253+
include_nonspecific_template: bool = False,
250254
) -> Dict:
251255
"""Run the matching and create the report JSON for upload to IPR.
252256
@@ -270,6 +274,10 @@ def ipr_report(
270274
custom_kb_match_filter: function(List[kbMatch]) -> List[kbMatch]
271275
async_upload: use report_async endpoint to upload reports
272276
mins_to_wait: if using report_async, number of minutes to wait for success before exception raised
277+
include_ipr_variant_text: if True, include output from the ipr variant-texts endpoint in analysis comments
278+
include_nonspecific_disease: if include_ipr_variant_text is True, if no disease match is found use disease-nonspecific variant comment
279+
include_nonspecific_project: if include_ipr_variant_text is True, if no project match is found use project-nonspecific variant comment
280+
include_nonspecific_template: if include_ipr_variant_text is True, if no template match is found use template-nonspecific variant comment
273281
274282
Returns:
275283
ipr_conn.upload_report return dictionary
@@ -448,21 +456,26 @@ def ipr_report(
448456

449457
logger.info("generating analyst comments")
450458

459+
comments_list = []
451460
if generate_comments:
452461
graphkb_comments = auto_analyst_comments(
453462
graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
454463
)
464+
comments_list.append(graphkb_comments)
455465

456-
ipr_comments = ipr_analyst_comments(
466+
if include_ipr_variant_text:
467+
ipr_comments = get_ipr_analyst_comments(
457468
ipr_conn,
458469
gkb_matches,
459470
disease_name=kb_disease_match,
460471
project_name=content['project'],
461472
report_type=content['template'],
473+
include_nonspecific_disease=include_nonspecific_disease,
474+
include_nonspecific_project=include_nonspecific_project,
475+
include_nonspecific_template=include_nonspecific_template,
462476
)
463-
comments = {"comments": "\n".join([ipr_comments, graphkb_comments])}
464-
else:
465-
comments = {"comments": ""}
477+
comments_list.append(ipr_comments)
478+
comments = "\n".join(comments_list)
466479

467480
# thread safe deep-copy the original content
468481
output = json.loads(json.dumps(content))

pori_python/ipr/summary.py

Lines changed: 91 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -364,63 +364,113 @@ def section_statements_by_genes(
364364
return genes
365365

366366

367-
def prep_single_ipr_variant_comment(item):
368-
cancer_type = ",".join(item["cancerType"])
367+
def prep_single_ipr_variant_comment(variant_text):
368+
"""Formats single item of custom variant text for inclusion in the analyst comments.
369+
370+
Params:
371+
variant_text:
372+
373+
Returns:
374+
section: html-formatted string
375+
"""
376+
cancer_type = ",".join(variant_text["cancerType"])
369377
if not cancer_type:
370378
cancer_type = "no specific cancer types"
371379
cancer_type = f" ({cancer_type})"
372-
section = [f"<h2>{item['variantName']}{cancer_type}</h2>"]
373-
section.append(f"<p>{item['text']}</p>")
380+
section = [f"<h2>{variant_text['variantName']}{cancer_type}</h2>"]
381+
section.append(f"<p>{variant_text['text']}</p>")
374382
return section
375383

376384

377-
def ipr_analyst_comments(
385+
def get_ipr_analyst_comments(
378386
ipr_conn: IprConnection,
379387
matches: Sequence[KbMatch] | Sequence[Hashabledict],
380388
disease_name: str,
381389
project_name: str,
382390
report_type: str,
383-
):
384-
output: List[str] = [
385-
"<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
386-
]
391+
include_nonspecific_disease: bool = False,
392+
include_nonspecific_project: bool = False,
393+
include_nonspecific_template: bool = False,
394+
) -> str:
395+
"""
396+
Given a list of kbmatches, checks the variant_texts table in IPR-API to get any
397+
pre-prepared text for this variant for inclusion in the analyst comments.
398+
Matches on template, project and variant_name. Matches on project, disease and template
399+
if possible. If no match is found and the related include_nonspecific arg is True,
400+
uses a result with no specified value for that field if a result is found (eg
401+
a result with no cancer type specified, if it exists).
402+
403+
Params:
404+
ipr_conn: connection to the ipr db
405+
matches: list of kbmatches which will be included in the report
406+
disease_name: str, eg 'colorectal cancer'
407+
project_name: str, eg TEST or pog
408+
report_type: str, eg genomic or rapid
409+
include_nonspecific_disease: bool - true if variant texts that don't explicitly
410+
name a cancer type should be included
411+
include_nonspecific_project: bool - true if variant texts that don't explicitly
412+
name a project should be included
413+
include_nonspecific_template: bool - true if variant texts that don't explicitly
414+
name a project should be included
415+
Returns:
416+
html-formatted string
417+
"""
418+
output_header = "<h3>The comments below were automatically drawn from curated text stored in IPR for variant matches in this report, and have not been manually reviewed</h3>"
419+
no_comments_found_output = "No comments found in IPR for variants in this report"
420+
output = []
421+
# get the list of variants to check for custom text for
422+
match_set = list(set([item["kbVariant"] for item in matches]))
423+
424+
for variant in match_set:
425+
data = {
426+
"variantName": variant,
427+
}
428+
itemlist: list[dict] = []
429+
itemlist = ipr_conn.get("variant-text", data=data) # type: ignore
430+
if itemlist:
431+
import pdb
387432

388-
template_ident = None
389-
templates = ipr_conn.get(f"templates?name={report_type}")
390-
# if this is genomic expect two results - one 'pharmacogenomic', which must be removed
391-
if templates:
392-
template_idents = [item for item in templates if item["name"] == report_type]
393-
if template_idents:
394-
template_ident = template_idents[0]["ident"]
395-
396-
project_ident = None
397-
projects = ipr_conn.get(f"project")
398-
if projects:
399-
project_idents = [item for item in projects if item["name"] == project_name]
400-
if project_idents:
401-
project_ident = project_idents[0]["ident"]
433+
pdb.set_trace()
402434

403-
match_set = list(set([item["kbVariant"] for item in matches]))
435+
project_matches = [
436+
item
437+
for item in itemlist
438+
if 'project' in item.keys() and item['project']['name'] == project_name
439+
]
440+
if project_matches:
441+
itemlist = project_matches
442+
elif include_nonspecific_project:
443+
itemlist = [item for item in itemlist if 'project' not in item.keys()]
444+
else:
445+
itemlist = []
404446

405-
if project_ident and template_ident:
406-
for variant in match_set:
407-
itemlist = ipr_conn.get(
408-
"variant-text",
409-
data={
410-
"variantName": variant,
411-
"template": template_ident,
412-
"project": project_ident,
413-
},
414-
)
415-
if itemlist:
416-
for item in itemlist:
417-
# include matching cancer type OR no cancer type specified
418-
if not item["cancerType"] or disease_name in item["cancerType"]:
419-
section = prep_single_ipr_variant_comment(item)
420-
output.extend(section)
447+
template_matches = [
448+
item
449+
for item in itemlist
450+
if 'template' in item.keys() and item['template']['name'] == report_type
451+
]
452+
if template_matches:
453+
itemlist = template_matches
454+
elif include_nonspecific_template:
455+
itemlist = [item for item in itemlist if 'template' not in item.keys()]
456+
else:
457+
itemlist = []
458+
459+
disease_matches = [item for item in itemlist if disease_name in item['cancerType']]
460+
if disease_matches:
461+
itemlist = disease_matches
462+
elif include_nonspecific_disease:
463+
itemlist = [item for item in itemlist if not item['cancerType']]
464+
else:
465+
itemlist = []
466+
467+
for item in itemlist:
468+
section = prep_single_ipr_variant_comment(item)
469+
output.extend(section)
421470

422471
if not output:
423-
output = ["No comments found in IPR for variants in this report"]
472+
return no_comments_found_output
473+
output.insert(0, output_header)
424474
return "\n".join(output)
425475

426476

0 commit comments

Comments
 (0)