|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Merge perf regression info from multiple YAML files into an HTML report.""" |
| 3 | + |
| 4 | +import argparse |
| 5 | +from html import escape as escape_html |
| 6 | + |
| 7 | +import yaml |
| 8 | + |
| 9 | +# Metrics where larger is better |
| 10 | +MAXIMIZE_METRICS = [ |
| 11 | + "d_seq_throughput", |
| 12 | + "d_token_throughput", |
| 13 | + "d_total_token_throughput", |
| 14 | + "d_user_throughput", |
| 15 | + "d_mean_tpot", |
| 16 | + "d_median_tpot", |
| 17 | + "d_p99_tpot", |
| 18 | +] |
| 19 | + |
| 20 | +# Metrics where smaller is better |
| 21 | +MINIMIZE_METRICS = [ |
| 22 | + "d_mean_ttft", |
| 23 | + "d_median_ttft", |
| 24 | + "d_p99_ttft", |
| 25 | + "d_mean_itl", |
| 26 | + "d_median_itl", |
| 27 | + "d_p99_itl", |
| 28 | + "d_mean_e2el", |
| 29 | + "d_median_e2el", |
| 30 | + "d_p99_e2el", |
| 31 | +] |
| 32 | + |
| 33 | + |
| 34 | +def _get_metric_keys(): |
| 35 | + """Get all metric-related keys for filtering config keys.""" |
| 36 | + metric_keys = set() |
| 37 | + for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS: |
| 38 | + metric_suffix = metric[2:] # Strip "d_" prefix |
| 39 | + metric_keys.add(metric) |
| 40 | + metric_keys.add(f"d_baseline_{metric_suffix}") |
| 41 | + metric_keys.add(f"d_threshold_post_merge_{metric_suffix}") |
| 42 | + metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}") |
| 43 | + return metric_keys |
| 44 | + |
| 45 | + |
| 46 | +def _get_regression_content(data): |
| 47 | + """Get regression info and config content as a list of lines.""" |
| 48 | + lines = [] |
| 49 | + if "s_regression_info" in data: |
| 50 | + lines.append("=== Regression Info ===") |
| 51 | + regression_info = data["s_regression_info"] |
| 52 | + for line in regression_info.split(","): |
| 53 | + lines.append(line) |
| 54 | + |
| 55 | + metric_keys = _get_metric_keys() |
| 56 | + |
| 57 | + lines.append("") |
| 58 | + lines.append("=== Config ===") |
| 59 | + config_keys = sorted([key for key in data.keys() if key not in metric_keys]) |
| 60 | + for key in config_keys: |
| 61 | + if key == "s_regression_info": |
| 62 | + continue |
| 63 | + value = data[key] |
| 64 | + lines.append(f'"{key}": {value}') |
| 65 | + |
| 66 | + return lines |
| 67 | + |
| 68 | + |
| 69 | +def merge_regression_data(input_files): |
| 70 | + """Read all yaml file paths and merge regression data.""" |
| 71 | + yaml_files = [f.strip() for f in input_files.split(",") if f.strip()] |
| 72 | + |
| 73 | + regression_dict = {} |
| 74 | + load_failures = 0 |
| 75 | + |
| 76 | + for yaml_file in yaml_files: |
| 77 | + try: |
| 78 | + # Path format: .../{stage_name}/{folder_name}/regression_data.yaml |
| 79 | + path_parts = yaml_file.replace("\\", "/").split("/") |
| 80 | + if len(path_parts) < 3: |
| 81 | + continue |
| 82 | + |
| 83 | + stage_name = path_parts[-3] |
| 84 | + folder_name = path_parts[-2] |
| 85 | + |
| 86 | + with open(yaml_file, "r", encoding="utf-8") as f: |
| 87 | + content = yaml.safe_load(f) |
| 88 | + if content is None or not isinstance(content, list): |
| 89 | + continue |
| 90 | + |
| 91 | + filtered_data = [ |
| 92 | + d for d in content if isinstance(d, dict) and "s_test_case_name" in d |
| 93 | + ] |
| 94 | + |
| 95 | + if not filtered_data: |
| 96 | + continue |
| 97 | + |
| 98 | + if stage_name not in regression_dict: |
| 99 | + regression_dict[stage_name] = {} |
| 100 | + |
| 101 | + if folder_name not in regression_dict[stage_name]: |
| 102 | + regression_dict[stage_name][folder_name] = [] |
| 103 | + |
| 104 | + regression_dict[stage_name][folder_name].extend(filtered_data) |
| 105 | + |
| 106 | + except (OSError, yaml.YAMLError, UnicodeDecodeError) as e: |
| 107 | + load_failures += 1 |
| 108 | + print(f"Warning: Failed to load {yaml_file}: {e}") |
| 109 | + continue |
| 110 | + |
| 111 | + # Fail fast if caller provided inputs but none were readable/parseable. |
| 112 | + # (Keeps "no regressions found" working when yaml_files is empty.) |
| 113 | + if yaml_files and not regression_dict and load_failures == len(yaml_files): |
| 114 | + raise RuntimeError("Failed to load any regression YAML inputs; cannot generate report.") |
| 115 | + |
| 116 | + return regression_dict |
| 117 | + |
| 118 | + |
| 119 | +def generate_html(regression_dict, output_file): |
| 120 | + """Generate HTML report from regression data.""" |
| 121 | + html_template = """ |
| 122 | + <!DOCTYPE html> |
| 123 | + <html> |
| 124 | + <head> |
| 125 | + <title>Perf Regression Summary</title> |
| 126 | + <style> |
| 127 | + body {{ font-family: Arial, sans-serif; margin: 10px; }} |
| 128 | + .suite-container {{ |
| 129 | + margin-bottom: 20px; |
| 130 | + border: 1px solid #ddd; |
| 131 | + border-radius: 4px; |
| 132 | + }} |
| 133 | + .suite-header {{ |
| 134 | + padding: 10px; |
| 135 | + background: #f8f9fa; |
| 136 | + border-bottom: 1px solid #ddd; |
| 137 | + }} |
| 138 | + .summary {{ margin-bottom: 10px; }} |
| 139 | + .regression {{ color: #d93025; }} |
| 140 | + .testcase {{ |
| 141 | + border-left: 4px solid #d93025; |
| 142 | + margin: 5px 0; |
| 143 | + background: white; |
| 144 | + }} |
| 145 | + .test-details {{ |
| 146 | + padding: 10px; |
| 147 | + background: #f5f5f5; |
| 148 | + border-radius: 3px; |
| 149 | + }} |
| 150 | + pre {{ |
| 151 | + margin: 0; |
| 152 | + white-space: pre-wrap; |
| 153 | + word-wrap: break-word; |
| 154 | + background: #2b2b2b; |
| 155 | + color: #cccccc; |
| 156 | + padding: 10px; |
| 157 | + counter-reset: line; |
| 158 | + }} |
| 159 | + pre + pre {{ |
| 160 | + border-top: none; |
| 161 | + padding-top: 0; |
| 162 | + }} |
| 163 | + pre span {{ |
| 164 | + display: block; |
| 165 | + position: relative; |
| 166 | + padding-left: 4em; |
| 167 | + }} |
| 168 | + pre span:before {{ |
| 169 | + counter-increment: line; |
| 170 | + content: counter(line); |
| 171 | + position: absolute; |
| 172 | + left: 0; |
| 173 | + width: 3em; |
| 174 | + text-align: right; |
| 175 | + color: #666; |
| 176 | + padding-right: 1em; |
| 177 | + }} |
| 178 | + details summary {{ |
| 179 | + cursor: pointer; |
| 180 | + outline: none; |
| 181 | + }} |
| 182 | + details[open] summary {{ |
| 183 | + margin-bottom: 10px; |
| 184 | + }} |
| 185 | + </style> |
| 186 | + </head> |
| 187 | + <body> |
| 188 | + <h2>Perf Regression Summary</h2> |
| 189 | + {test_suites} |
| 190 | + </body> |
| 191 | + </html> |
| 192 | + """ |
| 193 | + |
| 194 | + all_suites_html = [] |
| 195 | + total_tests = 0 |
| 196 | + |
| 197 | + for stage_name in regression_dict: |
| 198 | + folder_dict = regression_dict[stage_name] |
| 199 | + # Count total tests for this stage |
| 200 | + tests_count = sum(len(data_list) for data_list in folder_dict.values()) |
| 201 | + total_tests += tests_count |
| 202 | + |
| 203 | + # Generate summary for the suite |
| 204 | + summary = f""" |
| 205 | + <div class="suite-header"> |
| 206 | + <h3>Stage: {escape_html(stage_name)}</h3> |
| 207 | + <p><span class="regression">Regression Tests: {tests_count}</span></p> |
| 208 | + </div> |
| 209 | + """ |
| 210 | + |
| 211 | + # Generate test case details for the suite |
| 212 | + test_cases_html = [] |
| 213 | + |
| 214 | + for folder_name, data_list in folder_dict.items(): |
| 215 | + for data in data_list: |
| 216 | + test_case_name = data.get("s_test_case_name", "N/A") |
| 217 | + test_name = f"perf/test_perf_sanity.py::test_e2e[{folder_name}] - {test_case_name}" |
| 218 | + |
| 219 | + # Get content lines |
| 220 | + content_lines = _get_regression_content(data) |
| 221 | + content_html = "".join( |
| 222 | + f"<span>{escape_html(line)}</span>" for line in content_lines |
| 223 | + ) |
| 224 | + |
| 225 | + details = f""" |
| 226 | + <details class="test-details"> |
| 227 | + <summary>{escape_html(test_name)}</summary> |
| 228 | + <pre>{content_html}</pre> |
| 229 | + </details> |
| 230 | + """ |
| 231 | + |
| 232 | + test_case_html = f""" |
| 233 | + <div class="testcase"> |
| 234 | + {details} |
| 235 | + </div> |
| 236 | + """ |
| 237 | + test_cases_html.append(test_case_html) |
| 238 | + |
| 239 | + # Combine summary and test cases for this suite |
| 240 | + suite_html = f""" |
| 241 | + <div class="suite-container"> |
| 242 | + {summary} |
| 243 | + <div class="test-cases"> |
| 244 | + {" ".join(test_cases_html)} |
| 245 | + </div> |
| 246 | + </div> |
| 247 | + """ |
| 248 | + all_suites_html.append(suite_html) |
| 249 | + |
| 250 | + # Generate complete HTML |
| 251 | + html_content = html_template.format(test_suites="\n".join(all_suites_html)) |
| 252 | + |
| 253 | + # Write to file |
| 254 | + with open(output_file, "w", encoding="utf-8") as f: |
| 255 | + f.write(html_content) |
| 256 | + |
| 257 | + print(f"Generated HTML report with {total_tests} regression entries: {output_file}") |
| 258 | + |
| 259 | + |
| 260 | +def main(): |
| 261 | + parser = argparse.ArgumentParser( |
| 262 | + description="Merge perf regression info from YAML files into an HTML report." |
| 263 | + ) |
| 264 | + parser.add_argument( |
| 265 | + "--input-files", type=str, required=True, help="Comma-separated list of YAML file paths" |
| 266 | + ) |
| 267 | + parser.add_argument("--output-file", type=str, required=True, help="Output HTML file path") |
| 268 | + args = parser.parse_args() |
| 269 | + |
| 270 | + regression_dict = merge_regression_data(args.input_files) |
| 271 | + generate_html(regression_dict, args.output_file) |
| 272 | + |
| 273 | + |
| 274 | +if __name__ == "__main__": |
| 275 | + main() |
0 commit comments