|
| 1 | +"""Script to parse logs from validity check output.""" |
| 2 | + |
| 3 | +import re |
| 4 | + |
| 5 | +import hail as hl |
| 6 | + |
| 7 | + |
| 8 | +def parse_log_file(log_file): |
| 9 | + """Parse a log file and categorizes messages for formatting, extracting function names and sources. |
| 10 | +
|
| 11 | + :param log_file: Path to the log file containing python logging output (such as INFO and WARNING statements). |
| 12 | + :return: List of tuples containing logger information describing the validity check, status category, source, message, and associated table if relevant. |
| 13 | + """ |
| 14 | + parsed_logs = [] |
| 15 | + log_pattern = re.compile( |
| 16 | + r"^(INFO|WARNING|ERROR) \(([^)]+)\.(\w+) (\d+)\): (.*)" |
| 17 | + ) # Extract log level, module, function, line number, and message. |
| 18 | + |
| 19 | + function_mapping = { |
| 20 | + "validate_config": "general info", |
| 21 | + "validate_ht_fields": "general info", |
| 22 | + "main": "general info", |
| 23 | + "validate_federated_data": "general info", |
| 24 | + "summarize_variants": "variant summary", |
| 25 | + "sum_group_callstats": "group summations", |
| 26 | + "check_missingness": "missingness", |
| 27 | + "compute_missingness": "missingness", |
| 28 | + "make_group_sum_expr_dict": "group summations", |
| 29 | + "check_sex_chr_metrics": "XY check", |
| 30 | + "check_raw_and_adj_callstats": "raw/adj check", |
| 31 | + } |
| 32 | + |
| 33 | + with hl.hadoop_open(log_file, "r") as f: |
| 34 | + current_message = "" |
| 35 | + current_table = [] |
| 36 | + current_metadata = None # Stores log metadata before a table appears. |
| 37 | + |
| 38 | + for line in f: |
| 39 | + match = log_pattern.match(line) |
| 40 | + |
| 41 | + if match: |
| 42 | + # Store previous log if it had a table. |
| 43 | + if current_message and current_metadata: |
| 44 | + # Remove ASCII table lines from the message part but keep the full |
| 45 | + # table separately. |
| 46 | + cleaned_message = re.sub(r"\+\-.*\+$", "", current_message).strip() |
| 47 | + parsed_logs.append( |
| 48 | + ( |
| 49 | + *current_metadata, |
| 50 | + cleaned_message, |
| 51 | + "\n".join(current_table).strip(), |
| 52 | + ) |
| 53 | + ) |
| 54 | + |
| 55 | + # Start new log message. |
| 56 | + log_level, module, function_name, line_number, message = match.groups() |
| 57 | + source = f"{module}.{function_name} {line_number}" |
| 58 | + validity_check = function_mapping.get(function_name, function_name) |
| 59 | + |
| 60 | + # Determine the category. |
| 61 | + message_lower = message.lower() |
| 62 | + log_categories = { |
| 63 | + "INFO": lambda msg: ( |
| 64 | + "pass" |
| 65 | + if "passed" in msg |
| 66 | + else ( |
| 67 | + "fail" |
| 68 | + if any(word in msg for word in ["failed", "fail"]) |
| 69 | + else "info" |
| 70 | + ) |
| 71 | + ), |
| 72 | + "WARNING": "warn", |
| 73 | + "ERROR": "fail", |
| 74 | + } |
| 75 | + category = log_categories.get(log_level, "info") |
| 76 | + category = category(message_lower) if callable(category) else category |
| 77 | + |
| 78 | + # Reset tracking. |
| 79 | + current_message = message |
| 80 | + current_table = [] |
| 81 | + current_metadata = (validity_check, category, source) |
| 82 | + |
| 83 | + elif "+----" in line or "| locus" in line: # Table start detection. |
| 84 | + current_table.append(line.strip()) # Add table row. |
| 85 | + elif current_table: # If already collecting a table. |
| 86 | + current_table.append(line.strip()) |
| 87 | + |
| 88 | + # Store last log if it had a table. |
| 89 | + if current_message and current_metadata: |
| 90 | + cleaned_message = re.sub(r"\+\-.*\+$", "", current_message).strip() |
| 91 | + parsed_logs.append( |
| 92 | + (*current_metadata, cleaned_message, "\n".join(current_table).strip()) |
| 93 | + ) |
| 94 | + |
| 95 | + return parsed_logs |
| 96 | + |
| 97 | + |
| 98 | +def generate_html_report(parsed_logs, output_file): |
| 99 | + """Generate an HTML report with sortable and filterable columns, with expandable tables for results.""" |
| 100 | + html_template = """ |
| 101 | + <html> |
| 102 | + <head> |
| 103 | + <style> |
| 104 | + body { font-family: Arial, sans-serif; } |
| 105 | + table { width: 100%; border-collapse: collapse; } |
| 106 | + th, td { border: 1px solid black; padding: 8px; text-align: left; vertical-align: top; } |
| 107 | + th { background-color: #f2f2f2; cursor: pointer; } |
| 108 | + .pass { color: 0d1cb6; } |
| 109 | + .fail { color: #D42736; font-weight: bold;} |
| 110 | + .warn { color: #DAA520; } |
| 111 | + .info { color: black; } |
| 112 | + .hidden-table { display: none; } |
| 113 | + .toggle-btn { |
| 114 | + cursor: pointer; |
| 115 | + color: blue; |
| 116 | + text-decoration: underline; |
| 117 | + float: right; /* Moves "View Table" to the right */ |
| 118 | + margin-left: 15px; /* Adds spacing between message and button */ |
| 119 | + font-weight: normal; |
| 120 | + } |
| 121 | + .checkbox-container { |
| 122 | + display: inline-block; |
| 123 | + margin-left: 20px; |
| 124 | + } |
| 125 | + pre { |
| 126 | + text-align: left; |
| 127 | + white-space: pre-wrap; |
| 128 | + font-family: monospace; |
| 129 | + background: #f8f8f8; |
| 130 | + padding: 10px; |
| 131 | + border-radius: 5px; |
| 132 | + overflow-x: auto; |
| 133 | + width: 100%; |
| 134 | + display: block; |
| 135 | + color: black; /* Ensure table text is plain black */ |
| 136 | + font-weight: normal; /* Ensure no bolding */ |
| 137 | + } |
| 138 | + </style> |
| 139 | + <script> |
| 140 | + function toggleTable(id) { |
| 141 | + var tableDiv = document.getElementById(id); |
| 142 | + tableDiv.style.display = (tableDiv.style.display === "none" || tableDiv.style.display === "") ? "block" : "none"; |
| 143 | + } |
| 144 | +
|
| 145 | + function toggleAllTables() { |
| 146 | + var tables = document.getElementsByClassName("hidden-table"); |
| 147 | + var checkbox = document.getElementById("toggleAll"); |
| 148 | + var showAll = checkbox.checked; |
| 149 | +
|
| 150 | + for (var i = 0; i < tables.length; i++) { |
| 151 | + tables[i].style.display = showAll ? "block" : "none"; |
| 152 | + } |
| 153 | + } |
| 154 | +
|
| 155 | + function sortTable(n) { |
| 156 | + var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0; |
| 157 | + table = document.getElementById("logTable"); |
| 158 | + switching = true; |
| 159 | + dir = "asc"; |
| 160 | + while (switching) { |
| 161 | + switching = false; |
| 162 | + rows = table.rows; |
| 163 | + for (i = 1; i < (rows.length - 1); i++) { |
| 164 | + shouldSwitch = false; |
| 165 | + x = rows[i].getElementsByTagName("TD")[n].innerHTML.toLowerCase(); |
| 166 | + y = rows[i + 1].getElementsByTagName("TD")[n].innerHTML.toLowerCase(); |
| 167 | + if ((dir == "asc" && x > y) || (dir == "desc" && x < y)) { |
| 168 | + shouldSwitch = true; |
| 169 | + break; |
| 170 | + } |
| 171 | + } |
| 172 | + if (shouldSwitch) { |
| 173 | + rows[i].parentNode.insertBefore(rows[i + 1], rows[i]); |
| 174 | + switching = true; |
| 175 | + switchcount++; |
| 176 | + } else { |
| 177 | + if (switchcount === 0 && dir === "asc") { |
| 178 | + dir = "desc"; |
| 179 | + switching = true; |
| 180 | + } |
| 181 | + } |
| 182 | + } |
| 183 | + } |
| 184 | +
|
| 185 | + function filterTable() { |
| 186 | + var validityFilter = document.getElementById("functionFilter").value.toLowerCase(); |
| 187 | + var statusFilter = document.getElementById("statusFilter").value.toLowerCase(); |
| 188 | + var table, tr, i; |
| 189 | + table = document.getElementById("logTable"); |
| 190 | + tr = table.getElementsByTagName("tr"); |
| 191 | +
|
| 192 | + for (i = 1; i < tr.length; i++) { |
| 193 | + var validityCheck = tr[i].getElementsByTagName("td")[0].innerHTML.toLowerCase(); |
| 194 | + var status = tr[i].getElementsByTagName("td")[1].innerHTML.toLowerCase(); |
| 195 | +
|
| 196 | + if ((validityFilter === "all" || validityCheck === validityFilter) && |
| 197 | + (statusFilter === "all" || status === statusFilter)) { |
| 198 | + tr[i].style.display = ""; |
| 199 | + } else { |
| 200 | + tr[i].style.display = "none"; |
| 201 | + } |
| 202 | + } |
| 203 | + } |
| 204 | + </script> |
| 205 | + </head> |
| 206 | + <body> |
| 207 | + <h2>Log Report</h2> |
| 208 | + <label for="functionFilter">Filter by Validity Check:</label> |
| 209 | + <select id="functionFilter" onchange="filterTable()"> |
| 210 | + <option value="all">All</option> |
| 211 | + """ |
| 212 | + |
| 213 | + validity_checks = set() |
| 214 | + statuses = set() |
| 215 | + |
| 216 | + for validity_check, category, source, message, table in parsed_logs: |
| 217 | + validity_checks.add(validity_check) |
| 218 | + statuses.add(category) |
| 219 | + |
| 220 | + for validity_check in sorted(validity_checks): |
| 221 | + html_template += f'<option value="{validity_check}">{validity_check}</option>' |
| 222 | + |
| 223 | + html_template += """ |
| 224 | + </select> |
| 225 | + <label for="statusFilter">Filter by Status:</label> |
| 226 | + <select id="statusFilter" onchange="filterTable()"> |
| 227 | + <option value="all">All</option> |
| 228 | + """ |
| 229 | + |
| 230 | + for status in sorted(statuses): |
| 231 | + html_template += f'<option value="{status}">{status.upper()}</option>' |
| 232 | + |
| 233 | + html_template += """ |
| 234 | + </select> |
| 235 | +
|
| 236 | + <!-- Checkbox to show/hide all tables --> |
| 237 | + <span class="checkbox-container"> |
| 238 | + <input type="checkbox" id="toggleAll" onclick="toggleAllTables()"> |
| 239 | + <label for="toggleAll">Show All Tables</label> |
| 240 | + </span> |
| 241 | +
|
| 242 | + <table id="logTable"> |
| 243 | + <tr> |
| 244 | + <th onclick="sortTable(0)">Validity Check</th> |
| 245 | + <th onclick="sortTable(1)">Status</th> |
| 246 | + <th onclick="sortTable(2)">Source</th> |
| 247 | + <th onclick="sortTable(3)">Message</th> |
| 248 | + </tr> |
| 249 | + """ |
| 250 | + |
| 251 | + for i, (validity_check, category, source, message, table) in enumerate(parsed_logs): |
| 252 | + table_id = f"table_{i}" |
| 253 | + table_button = ( |
| 254 | + f'<span class="toggle-btn" onclick="toggleTable(\'{table_id}\')">View Table</span>' |
| 255 | + if table |
| 256 | + else "" |
| 257 | + ) |
| 258 | + |
| 259 | + html_template += ( |
| 260 | + f'<tr class="{category}">' |
| 261 | + f"<td>{validity_check}</td>" |
| 262 | + f'<td class="{category}">{category.upper()}</td>' |
| 263 | + f"<td>{source}</td>" |
| 264 | + f"<td>{message} {table_button}" |
| 265 | + ) |
| 266 | + |
| 267 | + if table: |
| 268 | + html_template += ( |
| 269 | + f'<div id="{table_id}" class="hidden-table"><pre>{table}</pre></div>' |
| 270 | + ) |
| 271 | + |
| 272 | + html_template += "</td></tr>" |
| 273 | + |
| 274 | + html_template += "</table></body></html>" |
| 275 | + |
| 276 | + with hl.hadoop_open(output_file, "w") as f: |
| 277 | + f.write(html_template) |
0 commit comments