Skip to content

Commit e31c1bb

Browse files
committed
small edits
1 parent a1435bf commit e31c1bb

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

gnomad/assessment/validity_checks.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -459,36 +459,56 @@ def _filter_agg_order(
459459
"""
460460
t = t.rows() if isinstance(t, hl.MatrixTable) else t
461461
# NOTE: make_filters_expr_dict returns a dict with %ages of variants filtered
462-
t.group_by(**group_exprs).aggregate(
463-
**make_filters_expr_dict(t, extra_filter_checks, variant_filter_field)
464-
).order_by(hl.desc("n")).show(n_rows, n_cols)
462+
log_stream = io.StringIO()
463+
with redirect_stdout(log_stream):
464+
t.group_by(**group_exprs).aggregate(
465+
**make_filters_expr_dict(t, extra_filter_checks, variant_filter_field)
466+
).order_by(hl.desc("n")).show(n_rows, n_cols)
467+
table_output = log_stream.getvalue().strip()
468+
return table_output
465469

466470
logger.info(
467471
"Checking distributions of filtered variants amongst variant filters..."
468472
)
469-
_filter_agg_order(t, {"is_filtered": t.is_filtered}, n_rows, n_cols)
473+
summary_table = _filter_agg_order(t, {"is_filtered": t.is_filtered}, n_rows, n_cols)
474+
logger.info(
475+
"Distributions of filtered variants amongst variant filters: %s",
476+
f"\n{summary_table}",
477+
)
470478

471479
add_agg_expr = {}
472480
if "allele_type" in t.info:
473481
logger.info("Checking distributions of variant type amongst variant filters...")
474482
add_agg_expr["allele_type"] = t.info.allele_type
475-
_filter_agg_order(t, add_agg_expr, n_rows, n_cols)
483+
summary_table = _filter_agg_order(t, add_agg_expr, n_rows, n_cols)
484+
logger.info(
485+
"Distributions of variant type amongst variant filters: %s",
486+
f"\n{summary_table}",
487+
)
476488

477489
if "in_problematic_region" in t.row:
478490
logger.info(
479491
"Checking distributions of variant type and region type amongst variant"
480492
" filters..."
481493
)
482494
add_agg_expr["in_problematic_region"] = t.in_problematic_region
483-
_filter_agg_order(t, add_agg_expr, n_rows, n_cols)
495+
summary_table = _filter_agg_order(t, add_agg_expr, n_rows, n_cols)
496+
logger.info(
497+
"Distributions of variant type and region amongst variant filters: %s",
498+
f"\n{summary_table}",
499+
)
484500

485501
if "n_alt_alleles" in t.info:
486502
logger.info(
487503
"Checking distributions of variant type, region type, and number of alt alleles"
488504
" amongst variant filters..."
489505
)
490506
add_agg_expr["n_alt_alleles"] = t.info.n_alt_alleles
491-
_filter_agg_order(t, add_agg_expr, n_rows, n_cols)
507+
summary_table = _filter_agg_order(t, add_agg_expr, n_rows, n_cols)
508+
logger.info(
509+
"Distributions of variant type, region type, and number of alt alleles variant filters: %s",
510+
f"\n{summary_table}",
511+
)
492512

493513

494514
def compare_subset_freqs(

0 commit comments

Comments
 (0)