Skip to content

Commit e97af45

Browse files
authored
[TRTLLM-10300][feat] Upload regression info to artifactory (NVIDIA#10599)
Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>
1 parent a6a63f5 commit e97af45

5 files changed

Lines changed: 345 additions & 8 deletions

File tree

jenkins/L0_MergeRequest.groovy

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,30 @@ def collectTestResults(pipeline, testFilter)
864864

865865
junit(testResults: '**/results*.xml', allowEmptyResults : true)
866866
} // Collect test result stage
867+
stage("Collect Perf Regression Result") {
868+
def yamlFiles = sh(
869+
returnStdout: true,
870+
script: 'find . -type f -name "regression_data.yaml" 2>/dev/null || true'
871+
).trim()
872+
echo "Regression data yaml files: ${yamlFiles}"
873+
if (yamlFiles) {
874+
def yamlFileList = yamlFiles.split(/\s+/).collect { it.trim() }.findAll { it }.join(",")
875+
echo "Found regression data files: ${yamlFileList}"
876+
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add python3")
877+
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apk add py3-pip")
878+
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 config set global.break-system-packages true")
879+
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install pyyaml")
880+
sh """
881+
python3 llm/jenkins/scripts/perf/perf_regression.py \
882+
--input-files=${yamlFileList} \
883+
--output-file=perf_regression.html
884+
"""
885+
trtllm_utils.uploadArtifacts("perf_regression.html", "${UPLOAD_PATH}/test-results/")
886+
echo "Perf regression report: https://urm.nvidia.com/artifactory/${UPLOAD_PATH}/test-results/perf_regression.html"
887+
} else {
888+
echo "No regression_data.yaml files found."
889+
}
890+
} // Collect Perf Regression Result stage
867891
stage("Rerun Report") {
868892
sh "rm -rf rerun && mkdir -p rerun"
869893
sh "find . -type f -wholename '*/rerun_results.xml' -exec sh -c 'mv \"{}\" \"rerun/\$(basename \$(dirname \"{}\"))_rerun_results.xml\"' \\; || true"

jenkins/L0_Test.groovy

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def uploadResults(def pipeline, SlurmCluster cluster, String nodeName, String st
124124

125125
def hasTimeoutTest = false
126126
def downloadResultSucceed = false
127+
def downloadPerfResultSucceed = false
127128

128129
pipeline.stage('Submit Test Result') {
129130
sh "mkdir -p ${stageName}"
@@ -146,8 +147,28 @@ EOF_TIMEOUT_XML
146147
def resultsFilePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}/results.xml"
147148
downloadResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${remote.user}@${remote.host}:${resultsFilePath} ${stageName}/", returnStatus: true, numRetries: 3) == 0
148149

149-
echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}"
150-
if (hasTimeoutTest || downloadResultSucceed) {
150+
// Download perf test results
151+
def perfResultsBasePath = "/home/svc_tensorrt/bloom/scripts/${nodeName}"
152+
def folderListOutput = Utils.exec(
153+
pipeline,
154+
script: Utils.sshUserCmd(
155+
remote,
156+
"\"find '${perfResultsBasePath}' -maxdepth 1 -type d \\( -name 'aggr*' -o -name 'disagg*' \\) -printf '%f\\n' || true\""
157+
),
158+
returnStdout: true,
159+
numRetries: 3
160+
)?.trim() ?: ""
161+
def perfFolders = folderListOutput.split(/\s+/).collect { it.trim().replaceAll(/\/$/, '') }.findAll { it }
162+
echo "Perf Result Folders: ${perfFolders}"
163+
if (perfFolders) {
164+
def scpSources = perfFolders.size() == 1
165+
? "${remote.user}@${remote.host}:${perfResultsBasePath}/${perfFolders[0]}"
166+
: "${remote.user}@${remote.host}:{${perfFolders.collect { "${perfResultsBasePath}/${it}" }.join(',')}}"
167+
downloadPerfResultSucceed = Utils.exec(pipeline, script: "sshpass -p '${remote.passwd}' scp -P ${remote.port} -r -p ${COMMON_SSH_OPTIONS} ${scpSources} ${stageName}/", returnStatus: true, numRetries: 3) == 0
168+
}
169+
170+
echo "hasTimeoutTest: ${hasTimeoutTest}, downloadResultSucceed: ${downloadResultSucceed}, downloadPerfResultSucceed: ${downloadPerfResultSucceed}"
171+
if (hasTimeoutTest || downloadResultSucceed || downloadPerfResultSucceed) {
151172
sh "ls ${stageName}"
152173
echo "Upload test results."
153174
sh "tar -czvf results-${stageName}.tar.gz ${stageName}/"
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
#!/usr/bin/env python3
2+
"""Merge perf regression info from multiple YAML files into an HTML report."""
3+
4+
import argparse
5+
from html import escape as escape_html
6+
7+
import yaml
8+
9+
# Metrics where larger is better
10+
MAXIMIZE_METRICS = [
11+
"d_seq_throughput",
12+
"d_token_throughput",
13+
"d_total_token_throughput",
14+
"d_user_throughput",
15+
"d_mean_tpot",
16+
"d_median_tpot",
17+
"d_p99_tpot",
18+
]
19+
20+
# Metrics where smaller is better
21+
MINIMIZE_METRICS = [
22+
"d_mean_ttft",
23+
"d_median_ttft",
24+
"d_p99_ttft",
25+
"d_mean_itl",
26+
"d_median_itl",
27+
"d_p99_itl",
28+
"d_mean_e2el",
29+
"d_median_e2el",
30+
"d_p99_e2el",
31+
]
32+
33+
34+
def _get_metric_keys():
35+
"""Get all metric-related keys for filtering config keys."""
36+
metric_keys = set()
37+
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
38+
metric_suffix = metric[2:] # Strip "d_" prefix
39+
metric_keys.add(metric)
40+
metric_keys.add(f"d_baseline_{metric_suffix}")
41+
metric_keys.add(f"d_threshold_post_merge_{metric_suffix}")
42+
metric_keys.add(f"d_threshold_pre_merge_{metric_suffix}")
43+
return metric_keys
44+
45+
46+
def _get_regression_content(data):
47+
"""Get regression info and config content as a list of lines."""
48+
lines = []
49+
if "s_regression_info" in data:
50+
lines.append("=== Regression Info ===")
51+
regression_info = data["s_regression_info"]
52+
for line in regression_info.split(","):
53+
lines.append(line)
54+
55+
metric_keys = _get_metric_keys()
56+
57+
lines.append("")
58+
lines.append("=== Config ===")
59+
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
60+
for key in config_keys:
61+
if key == "s_regression_info":
62+
continue
63+
value = data[key]
64+
lines.append(f'"{key}": {value}')
65+
66+
return lines
67+
68+
69+
def merge_regression_data(input_files):
70+
"""Read all yaml file paths and merge regression data."""
71+
yaml_files = [f.strip() for f in input_files.split(",") if f.strip()]
72+
73+
regression_dict = {}
74+
load_failures = 0
75+
76+
for yaml_file in yaml_files:
77+
try:
78+
# Path format: .../{stage_name}/{folder_name}/regression_data.yaml
79+
path_parts = yaml_file.replace("\\", "/").split("/")
80+
if len(path_parts) < 3:
81+
continue
82+
83+
stage_name = path_parts[-3]
84+
folder_name = path_parts[-2]
85+
86+
with open(yaml_file, "r", encoding="utf-8") as f:
87+
content = yaml.safe_load(f)
88+
if content is None or not isinstance(content, list):
89+
continue
90+
91+
filtered_data = [
92+
d for d in content if isinstance(d, dict) and "s_test_case_name" in d
93+
]
94+
95+
if not filtered_data:
96+
continue
97+
98+
if stage_name not in regression_dict:
99+
regression_dict[stage_name] = {}
100+
101+
if folder_name not in regression_dict[stage_name]:
102+
regression_dict[stage_name][folder_name] = []
103+
104+
regression_dict[stage_name][folder_name].extend(filtered_data)
105+
106+
except (OSError, yaml.YAMLError, UnicodeDecodeError) as e:
107+
load_failures += 1
108+
print(f"Warning: Failed to load {yaml_file}: {e}")
109+
continue
110+
111+
# Fail fast if caller provided inputs but none were readable/parseable.
112+
# (Keeps "no regressions found" working when yaml_files is empty.)
113+
if yaml_files and not regression_dict and load_failures == len(yaml_files):
114+
raise RuntimeError("Failed to load any regression YAML inputs; cannot generate report.")
115+
116+
return regression_dict
117+
118+
119+
def generate_html(regression_dict, output_file):
120+
"""Generate HTML report from regression data."""
121+
html_template = """
122+
<!DOCTYPE html>
123+
<html>
124+
<head>
125+
<title>Perf Regression Summary</title>
126+
<style>
127+
body {{ font-family: Arial, sans-serif; margin: 10px; }}
128+
.suite-container {{
129+
margin-bottom: 20px;
130+
border: 1px solid #ddd;
131+
border-radius: 4px;
132+
}}
133+
.suite-header {{
134+
padding: 10px;
135+
background: #f8f9fa;
136+
border-bottom: 1px solid #ddd;
137+
}}
138+
.summary {{ margin-bottom: 10px; }}
139+
.regression {{ color: #d93025; }}
140+
.testcase {{
141+
border-left: 4px solid #d93025;
142+
margin: 5px 0;
143+
background: white;
144+
}}
145+
.test-details {{
146+
padding: 10px;
147+
background: #f5f5f5;
148+
border-radius: 3px;
149+
}}
150+
pre {{
151+
margin: 0;
152+
white-space: pre-wrap;
153+
word-wrap: break-word;
154+
background: #2b2b2b;
155+
color: #cccccc;
156+
padding: 10px;
157+
counter-reset: line;
158+
}}
159+
pre + pre {{
160+
border-top: none;
161+
padding-top: 0;
162+
}}
163+
pre span {{
164+
display: block;
165+
position: relative;
166+
padding-left: 4em;
167+
}}
168+
pre span:before {{
169+
counter-increment: line;
170+
content: counter(line);
171+
position: absolute;
172+
left: 0;
173+
width: 3em;
174+
text-align: right;
175+
color: #666;
176+
padding-right: 1em;
177+
}}
178+
details summary {{
179+
cursor: pointer;
180+
outline: none;
181+
}}
182+
details[open] summary {{
183+
margin-bottom: 10px;
184+
}}
185+
</style>
186+
</head>
187+
<body>
188+
<h2>Perf Regression Summary</h2>
189+
{test_suites}
190+
</body>
191+
</html>
192+
"""
193+
194+
all_suites_html = []
195+
total_tests = 0
196+
197+
for stage_name in regression_dict:
198+
folder_dict = regression_dict[stage_name]
199+
# Count total tests for this stage
200+
tests_count = sum(len(data_list) for data_list in folder_dict.values())
201+
total_tests += tests_count
202+
203+
# Generate summary for the suite
204+
summary = f"""
205+
<div class="suite-header">
206+
<h3>Stage: {escape_html(stage_name)}</h3>
207+
<p><span class="regression">Regression Tests: {tests_count}</span></p>
208+
</div>
209+
"""
210+
211+
# Generate test case details for the suite
212+
test_cases_html = []
213+
214+
for folder_name, data_list in folder_dict.items():
215+
for data in data_list:
216+
test_case_name = data.get("s_test_case_name", "N/A")
217+
test_name = f"perf/test_perf_sanity.py::test_e2e[{folder_name}] - {test_case_name}"
218+
219+
# Get content lines
220+
content_lines = _get_regression_content(data)
221+
content_html = "".join(
222+
f"<span>{escape_html(line)}</span>" for line in content_lines
223+
)
224+
225+
details = f"""
226+
<details class="test-details">
227+
<summary>{escape_html(test_name)}</summary>
228+
<pre>{content_html}</pre>
229+
</details>
230+
"""
231+
232+
test_case_html = f"""
233+
<div class="testcase">
234+
{details}
235+
</div>
236+
"""
237+
test_cases_html.append(test_case_html)
238+
239+
# Combine summary and test cases for this suite
240+
suite_html = f"""
241+
<div class="suite-container">
242+
{summary}
243+
<div class="test-cases">
244+
{" ".join(test_cases_html)}
245+
</div>
246+
</div>
247+
"""
248+
all_suites_html.append(suite_html)
249+
250+
# Generate complete HTML
251+
html_content = html_template.format(test_suites="\n".join(all_suites_html))
252+
253+
# Write to file
254+
with open(output_file, "w", encoding="utf-8") as f:
255+
f.write(html_content)
256+
257+
print(f"Generated HTML report with {total_tests} regression entries: {output_file}")
258+
259+
260+
def main():
261+
parser = argparse.ArgumentParser(
262+
description="Merge perf regression info from YAML files into an HTML report."
263+
)
264+
parser.add_argument(
265+
"--input-files", type=str, required=True, help="Comma-separated list of YAML file paths"
266+
)
267+
parser.add_argument("--output-file", type=str, required=True, help="Output HTML file path")
268+
args = parser.parse_args()
269+
270+
regression_dict = merge_regression_data(args.input_files)
271+
generate_html(regression_dict, args.output_file)
272+
273+
274+
if __name__ == "__main__":
275+
main()

tests/integration/defs/perf/open_search_db_utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import time
2323
from datetime import datetime
2424

25+
import yaml
2526
from defs.trt_test_alternative import print_info, print_warning
2627

2728
_project_root = os.path.abspath(
@@ -609,11 +610,14 @@ def _print_regression_data(data, print_func=None):
609610
print_func(f'"{key}": {value}')
610611

611612

612-
def check_perf_regression(new_data_dict, fail_on_regression=False):
613+
def check_perf_regression(new_data_dict,
614+
fail_on_regression=False,
615+
output_dir=None):
613616
"""
614617
Check performance regression by printing regression data from new_data_dict.
615618
If fail_on_regression is True, raises RuntimeError when regressions are found.
616619
(This is a temporary feature to fail regression tests. We are observing the stability and will fail them by default soon.)
620+
If output_dir is provided, saves regression data to regression_data.yaml.
617621
"""
618622
# Filter regression data from new_data_dict
619623
regressive_data_list = [
@@ -630,6 +634,15 @@ def check_perf_regression(new_data_dict, fail_on_regression=False):
630634
if not data.get("b_is_post_merge", False)
631635
]
632636

637+
# Save regression data to yaml file if output_dir is provided
638+
if output_dir is not None and len(regressive_data_list) > 0:
639+
regression_data_file = os.path.join(output_dir, "regression_data.yaml")
640+
with open(regression_data_file, 'w') as f:
641+
yaml.dump(regressive_data_list, f, default_flow_style=False)
642+
print_info(
643+
f"Saved {len(regressive_data_list)} regression data to {regression_data_file}"
644+
)
645+
633646
# Print pre-merge regression data with print_warning
634647
if len(pre_merge_regressions) > 0:
635648
print_warning(

0 commit comments

Comments
 (0)