Skip to content

Commit d772e21

Browse files
committed
refactor: extract burden-summary renderer from burden_test (PR-5)
burden_test (~780-line CLI orchestrator) inlines its result presentation. Extract the cohesive ASCII summary / top-significant-table renderer into a pure _render_burden_summary(result_df, alpha, phenotype_type) helper — ~100 lines move out of the command body into a named, testable function. Behavior-preserving: output verified byte-identical to the prior inline logic for binary, continuous, and no-significant-results cases. Scope: the other two renderers the plan mentioned (the dry-run plan and the stratified-loop) are left in place — they depend on ~20 command params, so extracting them would be param-soup with little clarity gain; noted as a possible follow-up. flake8 clean; import smoke OK.
1 parent 45b3c3d commit d772e21

1 file changed

Lines changed: 107 additions & 99 deletions

File tree

hvantk/tools/enrichex/burden_cli.py

Lines changed: 107 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,112 @@ def register_burden_commands(group):
2727
group.add_command(burden_pipeline_cmd)
2828

2929

30+
def _render_burden_summary(result_df, alpha, phenotype_type: str) -> None:
31+
"""Print the single-class burden analysis summary and top-significant table.
32+
33+
Pure presentation extracted from ``burden_test``: counts significant gene
34+
sets and renders an ASCII table of the top hits, with columns chosen by
35+
phenotype type (odds ratio for binary, standard error for continuous).
36+
"""
37+
n_significant = result_df["significant"].sum()
38+
click.echo("\n" + "=" * 60)
39+
click.echo("BURDEN ANALYSIS SUMMARY")
40+
click.echo("=" * 60)
41+
click.echo(f"Total gene sets tested: {len(result_df)}")
42+
click.echo(f"Significant gene sets (p_adj < {alpha}): {n_significant}")
43+
44+
if n_significant > 0:
45+
click.echo(f"\nTop significant gene sets:")
46+
click.echo("-" * 60)
47+
48+
# Select columns based on phenotype type
49+
if phenotype_type == "binary":
50+
display_cols = [
51+
"gene_set_name",
52+
"beta",
53+
"odds_ratio",
54+
"p_value",
55+
"p_adjusted",
56+
]
57+
else:
58+
display_cols = [
59+
"gene_set_name",
60+
"beta",
61+
"standard_error",
62+
"p_value",
63+
"p_adjusted",
64+
]
65+
66+
# Filter to available columns
67+
available_cols = [c for c in display_cols if c in result_df.columns]
68+
top = result_df[result_df["significant"]].head(5)[available_cols]
69+
70+
# Format for display
71+
top_display = top.copy()
72+
if "beta" in top_display.columns:
73+
top_display["beta"] = top_display["beta"].apply(lambda x: f"{x:.4f}")
74+
if "odds_ratio" in top_display.columns:
75+
top_display["odds_ratio"] = top_display["odds_ratio"].apply(
76+
lambda x: f"{x:.2f}"
77+
)
78+
if "standard_error" in top_display.columns:
79+
top_display["standard_error"] = top_display["standard_error"].apply(
80+
lambda x: f"{x:.4f}"
81+
)
82+
top_display["p_value"] = top_display["p_value"].apply(lambda x: f"{x:.2e}")
83+
top_display["p_adjusted"] = top_display["p_adjusted"].apply(lambda x: f"{x:.2e}")
84+
85+
# Print table
86+
if phenotype_type == "binary":
87+
header = " ".join(
88+
[
89+
"Gene Set".ljust(20),
90+
"Beta".ljust(8),
91+
"OR".ljust(8),
92+
"P-value".ljust(10),
93+
"P-adj".ljust(10),
94+
]
95+
)
96+
else:
97+
header = " ".join(
98+
[
99+
"Gene Set".ljust(20),
100+
"Beta".ljust(8),
101+
"SE".ljust(8),
102+
"P-value".ljust(10),
103+
"P-adj".ljust(10),
104+
]
105+
)
106+
107+
click.echo(header)
108+
click.echo("-" * 60)
109+
110+
for _, row in top_display.iterrows():
111+
if phenotype_type == "binary":
112+
line = " ".join(
113+
[
114+
str(row["gene_set_name"])[:20].ljust(20),
115+
str(row["beta"]).ljust(8),
116+
str(row.get("odds_ratio", "N/A")).ljust(8),
117+
str(row["p_value"]).ljust(10),
118+
str(row["p_adjusted"]).ljust(10),
119+
]
120+
)
121+
else:
122+
line = " ".join(
123+
[
124+
str(row["gene_set_name"])[:20].ljust(20),
125+
str(row["beta"]).ljust(8),
126+
str(row.get("standard_error", "N/A")).ljust(8),
127+
str(row["p_value"]).ljust(10),
128+
str(row["p_adjusted"]).ljust(10),
129+
]
130+
)
131+
click.echo(line)
132+
133+
click.echo("=" * 60 + "\n")
134+
135+
30136
@click.command(name="burden")
31137
@click.option(
32138
"-m",
@@ -621,105 +727,7 @@ def burden_test(
621727
click.echo(f"\n Results written to: {results_path}")
622728

623729
# Summary statistics
624-
n_significant = result_df["significant"].sum()
625-
click.echo("\n" + "=" * 60)
626-
click.echo("BURDEN ANALYSIS SUMMARY")
627-
click.echo("=" * 60)
628-
click.echo(f"Total gene sets tested: {len(result_df)}")
629-
click.echo(f"Significant gene sets (p_adj < {alpha}): {n_significant}")
630-
631-
if n_significant > 0:
632-
click.echo(f"\nTop significant gene sets:")
633-
click.echo("-" * 60)
634-
635-
# Select columns based on phenotype type
636-
if phenotype_type == "binary":
637-
display_cols = [
638-
"gene_set_name",
639-
"beta",
640-
"odds_ratio",
641-
"p_value",
642-
"p_adjusted",
643-
]
644-
else:
645-
display_cols = [
646-
"gene_set_name",
647-
"beta",
648-
"standard_error",
649-
"p_value",
650-
"p_adjusted",
651-
]
652-
653-
# Filter to available columns
654-
available_cols = [c for c in display_cols if c in result_df.columns]
655-
top = result_df[result_df["significant"]].head(5)[available_cols]
656-
657-
# Format for display
658-
top_display = top.copy()
659-
if "beta" in top_display.columns:
660-
top_display["beta"] = top_display["beta"].apply(lambda x: f"{x:.4f}")
661-
if "odds_ratio" in top_display.columns:
662-
top_display["odds_ratio"] = top_display["odds_ratio"].apply(
663-
lambda x: f"{x:.2f}"
664-
)
665-
if "standard_error" in top_display.columns:
666-
top_display["standard_error"] = top_display["standard_error"].apply(
667-
lambda x: f"{x:.4f}"
668-
)
669-
top_display["p_value"] = top_display["p_value"].apply(lambda x: f"{x:.2e}")
670-
top_display["p_adjusted"] = top_display["p_adjusted"].apply(
671-
lambda x: f"{x:.2e}"
672-
)
673-
674-
# Print table
675-
if phenotype_type == "binary":
676-
header = " ".join(
677-
[
678-
"Gene Set".ljust(20),
679-
"Beta".ljust(8),
680-
"OR".ljust(8),
681-
"P-value".ljust(10),
682-
"P-adj".ljust(10),
683-
]
684-
)
685-
else:
686-
header = " ".join(
687-
[
688-
"Gene Set".ljust(20),
689-
"Beta".ljust(8),
690-
"SE".ljust(8),
691-
"P-value".ljust(10),
692-
"P-adj".ljust(10),
693-
]
694-
)
695-
696-
click.echo(header)
697-
click.echo("-" * 60)
698-
699-
for _, row in top_display.iterrows():
700-
if phenotype_type == "binary":
701-
line = " ".join(
702-
[
703-
str(row["gene_set_name"])[:20].ljust(20),
704-
str(row["beta"]).ljust(8),
705-
str(row.get("odds_ratio", "N/A")).ljust(8),
706-
str(row["p_value"]).ljust(10),
707-
str(row["p_adjusted"]).ljust(10),
708-
]
709-
)
710-
else:
711-
line = " ".join(
712-
[
713-
str(row["gene_set_name"])[:20].ljust(20),
714-
str(row["beta"]).ljust(8),
715-
str(row.get("standard_error", "N/A")).ljust(8),
716-
str(row["p_value"]).ljust(10),
717-
str(row["p_adjusted"]).ljust(10),
718-
]
719-
)
720-
click.echo(line)
721-
722-
click.echo("=" * 60 + "\n")
730+
_render_burden_summary(result_df, alpha, phenotype_type)
723731

724732
# Run permutation test if requested
725733
if permutation:

0 commit comments

Comments
 (0)