Skip to content

Commit c24838d

Browse files
committed
merge main
2 parents e31c1bb + df63d57 commit c24838d

File tree

2 files changed

+288
-5
lines changed

2 files changed

+288
-5
lines changed
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
"""Script to parse logs from validity check output."""
2+
3+
import re
4+
5+
import hail as hl
6+
7+
8+
def parse_log_file(log_file):
9+
"""Parse a log file and categorizes messages for formatting, extracting function names and sources.
10+
11+
:param log_file: Path to the log file containing python logging output (such as INFO and WARNING statements).
12+
:return: List of tuples containing logger information describing the validity check, status category, source, message, and associated table if relevant.
13+
"""
14+
parsed_logs = []
15+
log_pattern = re.compile(
16+
r"^(INFO|WARNING|ERROR) \(([^)]+)\.(\w+) (\d+)\): (.*)"
17+
) # Extract log level, module, function, line number, and message.
18+
19+
function_mapping = {
20+
"validate_config": "general info",
21+
"validate_ht_fields": "general info",
22+
"main": "general info",
23+
"validate_federated_data": "general info",
24+
"summarize_variants": "variant summary",
25+
"sum_group_callstats": "group summations",
26+
"check_missingness": "missingness",
27+
"compute_missingness": "missingness",
28+
"make_group_sum_expr_dict": "group summations",
29+
"check_sex_chr_metrics": "XY check",
30+
"check_raw_and_adj_callstats": "raw/adj check",
31+
}
32+
33+
with hl.hadoop_open(log_file, "r") as f:
34+
current_message = ""
35+
current_table = []
36+
current_metadata = None # Stores log metadata before a table appears.
37+
38+
for line in f:
39+
match = log_pattern.match(line)
40+
41+
if match:
42+
# Store previous log if it had a table.
43+
if current_message and current_metadata:
44+
# Remove ASCII table lines from the message part but keep the full
45+
# table separately.
46+
cleaned_message = re.sub(r"\+\-.*\+$", "", current_message).strip()
47+
parsed_logs.append(
48+
(
49+
*current_metadata,
50+
cleaned_message,
51+
"\n".join(current_table).strip(),
52+
)
53+
)
54+
55+
# Start new log message.
56+
log_level, module, function_name, line_number, message = match.groups()
57+
source = f"{module}.{function_name} {line_number}"
58+
validity_check = function_mapping.get(function_name, function_name)
59+
60+
# Determine the category.
61+
message_lower = message.lower()
62+
log_categories = {
63+
"INFO": lambda msg: (
64+
"pass"
65+
if "passed" in msg
66+
else (
67+
"fail"
68+
if any(word in msg for word in ["failed", "fail"])
69+
else "info"
70+
)
71+
),
72+
"WARNING": "warn",
73+
"ERROR": "fail",
74+
}
75+
category = log_categories.get(log_level, "info")
76+
category = category(message_lower) if callable(category) else category
77+
78+
# Reset tracking.
79+
current_message = message
80+
current_table = []
81+
current_metadata = (validity_check, category, source)
82+
83+
elif "+----" in line or "| locus" in line: # Table start detection.
84+
current_table.append(line.strip()) # Add table row.
85+
elif current_table: # If already collecting a table.
86+
current_table.append(line.strip())
87+
88+
# Store last log if it had a table.
89+
if current_message and current_metadata:
90+
cleaned_message = re.sub(r"\+\-.*\+$", "", current_message).strip()
91+
parsed_logs.append(
92+
(*current_metadata, cleaned_message, "\n".join(current_table).strip())
93+
)
94+
95+
return parsed_logs
96+
97+
98+
def generate_html_report(parsed_logs, output_file):
99+
"""Generate an HTML report with sortable and filterable columns, with expandable tables for results."""
100+
html_template = """
101+
<html>
102+
<head>
103+
<style>
104+
body { font-family: Arial, sans-serif; }
105+
table { width: 100%; border-collapse: collapse; }
106+
th, td { border: 1px solid black; padding: 8px; text-align: left; vertical-align: top; }
107+
th { background-color: #f2f2f2; cursor: pointer; }
108+
.pass { color: 0d1cb6; }
109+
.fail { color: #D42736; font-weight: bold;}
110+
.warn { color: #DAA520; }
111+
.info { color: black; }
112+
.hidden-table { display: none; }
113+
.toggle-btn {
114+
cursor: pointer;
115+
color: blue;
116+
text-decoration: underline;
117+
float: right; /* Moves "View Table" to the right */
118+
margin-left: 15px; /* Adds spacing between message and button */
119+
font-weight: normal;
120+
}
121+
.checkbox-container {
122+
display: inline-block;
123+
margin-left: 20px;
124+
}
125+
pre {
126+
text-align: left;
127+
white-space: pre-wrap;
128+
font-family: monospace;
129+
background: #f8f8f8;
130+
padding: 10px;
131+
border-radius: 5px;
132+
overflow-x: auto;
133+
width: 100%;
134+
display: block;
135+
color: black; /* Ensure table text is plain black */
136+
font-weight: normal; /* Ensure no bolding */
137+
}
138+
</style>
139+
<script>
140+
function toggleTable(id) {
141+
var tableDiv = document.getElementById(id);
142+
tableDiv.style.display = (tableDiv.style.display === "none" || tableDiv.style.display === "") ? "block" : "none";
143+
}
144+
145+
function toggleAllTables() {
146+
var tables = document.getElementsByClassName("hidden-table");
147+
var checkbox = document.getElementById("toggleAll");
148+
var showAll = checkbox.checked;
149+
150+
for (var i = 0; i < tables.length; i++) {
151+
tables[i].style.display = showAll ? "block" : "none";
152+
}
153+
}
154+
155+
function sortTable(n) {
156+
var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0;
157+
table = document.getElementById("logTable");
158+
switching = true;
159+
dir = "asc";
160+
while (switching) {
161+
switching = false;
162+
rows = table.rows;
163+
for (i = 1; i < (rows.length - 1); i++) {
164+
shouldSwitch = false;
165+
x = rows[i].getElementsByTagName("TD")[n].innerHTML.toLowerCase();
166+
y = rows[i + 1].getElementsByTagName("TD")[n].innerHTML.toLowerCase();
167+
if ((dir == "asc" && x > y) || (dir == "desc" && x < y)) {
168+
shouldSwitch = true;
169+
break;
170+
}
171+
}
172+
if (shouldSwitch) {
173+
rows[i].parentNode.insertBefore(rows[i + 1], rows[i]);
174+
switching = true;
175+
switchcount++;
176+
} else {
177+
if (switchcount === 0 && dir === "asc") {
178+
dir = "desc";
179+
switching = true;
180+
}
181+
}
182+
}
183+
}
184+
185+
function filterTable() {
186+
var validityFilter = document.getElementById("functionFilter").value.toLowerCase();
187+
var statusFilter = document.getElementById("statusFilter").value.toLowerCase();
188+
var table, tr, i;
189+
table = document.getElementById("logTable");
190+
tr = table.getElementsByTagName("tr");
191+
192+
for (i = 1; i < tr.length; i++) {
193+
var validityCheck = tr[i].getElementsByTagName("td")[0].innerHTML.toLowerCase();
194+
var status = tr[i].getElementsByTagName("td")[1].innerHTML.toLowerCase();
195+
196+
if ((validityFilter === "all" || validityCheck === validityFilter) &&
197+
(statusFilter === "all" || status === statusFilter)) {
198+
tr[i].style.display = "";
199+
} else {
200+
tr[i].style.display = "none";
201+
}
202+
}
203+
}
204+
</script>
205+
</head>
206+
<body>
207+
<h2>Log Report</h2>
208+
<label for="functionFilter">Filter by Validity Check:</label>
209+
<select id="functionFilter" onchange="filterTable()">
210+
<option value="all">All</option>
211+
"""
212+
213+
validity_checks = set()
214+
statuses = set()
215+
216+
for validity_check, category, source, message, table in parsed_logs:
217+
validity_checks.add(validity_check)
218+
statuses.add(category)
219+
220+
for validity_check in sorted(validity_checks):
221+
html_template += f'<option value="{validity_check}">{validity_check}</option>'
222+
223+
html_template += """
224+
</select>
225+
<label for="statusFilter">Filter by Status:</label>
226+
<select id="statusFilter" onchange="filterTable()">
227+
<option value="all">All</option>
228+
"""
229+
230+
for status in sorted(statuses):
231+
html_template += f'<option value="{status}">{status.upper()}</option>'
232+
233+
html_template += """
234+
</select>
235+
236+
<!-- Checkbox to show/hide all tables -->
237+
<span class="checkbox-container">
238+
<input type="checkbox" id="toggleAll" onclick="toggleAllTables()">
239+
<label for="toggleAll">Show All Tables</label>
240+
</span>
241+
242+
<table id="logTable">
243+
<tr>
244+
<th onclick="sortTable(0)">Validity Check</th>
245+
<th onclick="sortTable(1)">Status</th>
246+
<th onclick="sortTable(2)">Source</th>
247+
<th onclick="sortTable(3)">Message</th>
248+
</tr>
249+
"""
250+
251+
for i, (validity_check, category, source, message, table) in enumerate(parsed_logs):
252+
table_id = f"table_{i}"
253+
table_button = (
254+
f'<span class="toggle-btn" onclick="toggleTable(\'{table_id}\')">View Table</span>'
255+
if table
256+
else ""
257+
)
258+
259+
html_template += (
260+
f'<tr class="{category}">'
261+
f"<td>{validity_check}</td>"
262+
f'<td class="{category}">{category.upper()}</td>'
263+
f"<td>{source}</td>"
264+
f"<td>{message} {table_button}"
265+
)
266+
267+
if table:
268+
html_template += (
269+
f'<div id="{table_id}" class="hidden-table"><pre>{table}</pre></div>'
270+
)
271+
272+
html_template += "</td></tr>"
273+
274+
html_template += "</table></body></html>"
275+
276+
with hl.hadoop_open(output_file, "w") as f:
277+
f.write(html_template)

gnomad/assessment/validity_checks.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@
1212
from gnomad.resources.grch38.gnomad import CURRENT_MAJOR_RELEASE, POPS, SEXES
1313
from gnomad.utils.vcf import HISTS, SORT_ORDER, make_label_combos
1414

15-
# Save original LogRecord factory.
15+
# Save original LogRecord factory, i.e. the original logger.
1616
old_factory = logging.getLogRecordFactory()
1717

1818

19-
# Define custom factory that appends function names to logger.
19+
# Define custom factory that appends function names to logger so we can
20+
# parse them for the validation's output table.
2021
def custom_record_factory(suffix):
2122
"""Return a custom LogRecord factory that appends a given suffix to function names."""
2223

2324
def factory(*args, **kwargs):
2425
# Create original log record.
2526
record = old_factory(*args, **kwargs)
26-
# Append suffix tooriginal log record.
27+
# Append suffix to original log record for future parsing.
2728
record.funcName = f"{record.funcName}.{suffix}"
2829
return record
2930

@@ -99,6 +100,8 @@ def generic_field_check(
99100
if cond_expr is not None:
100101
ht_filtered = ht.select(_fail=cond_expr, **display_fields)
101102
ht_filtered = ht_filtered.filter(ht_filtered._fail).drop("_fail")
103+
# Use StringIO to capture the table from show() for display in the final
104+
# output table.
102105
log_stream = io.StringIO()
103106
with redirect_stdout(log_stream):
104107
ht_filtered.show(width=200)
@@ -175,9 +178,12 @@ def generate_field_check_expr(
175178
right_expr: Union[hl.expr.NumericExpression, hl.expr.StringExpression],
176179
operator: str,
177180
) -> hl.expr.BooleanExpression:
178-
"""Generate a Hail expression to check field comparisons while handling missing values.
181+
"""
182+
Generate a Hail expression to check field comparisons while handling missing values.
179183
180-
If both fields are missing, the retured expression will be False. If only one field is missing, the expression will be True. If both fields are defined and not equal, the expression will be True.
184+
If both fields are missing, the retured expression will be False. If only one field
185+
is missing, the expression will be True. If both fields are defined and not equal,
186+
the expression will be True.
181187
182188
:param left_expr: Left expression field for comparison.
183189
:param right_expr: Right expression field for comparison.

0 commit comments

Comments
 (0)