2828# Lists the Github workflows we want to track. Maps the Github job name to
2929# the metric name prefix in grafana.
3030# This metric name is also used as a key in the job->name map.
31- GITHUB_WORKFLOW_TO_TRACK = {"CI Checks" : "github_llvm_premerge_checks" }
31+ GITHUB_WORKFLOW_TO_TRACK = {
32+ "CI Checks" : "github_llvm_premerge_checks" ,
33+ "Build and Test libc++" : "github_libcxx_premerge_checks" ,
34+ }
3235
3336# Lists the Github jobs to track for a given workflow. The key is the stable
3437# name (metric name) of the workflow (see GITHUB_WORKFLOW_TO_TRACK).
3841 "github_llvm_premerge_checks" : {
3942 "Build and Test Linux" : "premerge_linux" ,
4043 "Build and Test Windows" : "premerge_windows" ,
41- }
44+ },
45+ "github_libcxx_premerge_checks" : {
46+ "stage1" : "premerge_libcxx_stage1" ,
47+ "stage2" : "premerge_libcxx_stage2" ,
48+ "stage3" : "premerge_libcxx_stage3" ,
49+ },
4250}
4351
4452# The number of workflows to pull when sampling Github workflows.
6270# by trial and error).
6371GRAFANA_METRIC_MAX_AGE_MN = 120
6472
65-
6673@dataclass
6774class JobMetrics :
6875 job_name : str
6976 queue_time : int
7077 run_time : int
7178 status : int
79+ created_at_ns : int
80+ started_at_ns : int
7281 completed_at_ns : int
7382 workflow_id : int
7483 workflow_name : str
@@ -81,6 +90,159 @@ class GaugeMetric:
8190 time_ns : int
8291
8392
93+ @dataclass
94+ class AggregateMetric :
95+ aggregate_name : str
96+ aggregate_queue_time : int
97+ aggregate_run_time : int
98+ aggregate_status : int
99+ completed_at_ns : int
100+ workflow_id : int
101+
102+
103+ def _construct_aggregate (ag_name : str , job_list : list [JobMetrics ]) -> AggregateMetric :
104+ """Create a libc++ AggregateMetric from a list of libc++ JobMetrics
105+
106+ How aggregates are computed:
107+ queue time: Time from when first job in group is created until last job in
108+ group has started.
109+ run time: Time from when first job in group starts running until last job
110+ in group finishes running.
111+ status: logical 'and' of all the job statuses in the group.
112+
113+ Args:
114+ ag_name: The name for this particular AggregateMetric
115+ job_list: This list of JobMetrics to be combined into the AggregateMetric.
116+ The input list should contain all (and only!) the libc++ JobMetrics
117+ for a particular stage and a particular workflow_id.
118+
119+ Returns:
120+ Returns the AggregateMetric constructed from the inputs.
121+ """
122+
123+ # Initialize the aggregate values
124+ earliest_create = job_list [0 ].created_at_ns
125+ earliest_start = job_list [0 ].started_at_ns
126+ earliest_complete = job_list [0 ].completed_at_ns
127+ latest_start = job_list [0 ].started_at_ns
128+ latest_complete = job_list [0 ].completed_at_ns
129+ ag_status = job_list [0 ].status
130+ ag_workflow_id = job_list [0 ].workflow_id
131+
132+ # Go through rest of jobs for this workflow id, if any, updating stats
133+ for job in job_list [1 :]:
134+ # Update the status
135+ ag_status = ag_status and job .status
136+ # Get the earliest & latest times
137+ if job .created_at_ns < earliest_create :
138+ earliest_create = job .created_at_ns
139+ if job .completed_at_ns < earliest_complete :
140+ earliest_complete = job .completed_at_ns
141+ if job .started_at_ns > latest_start :
142+ latest_start = job .started_at_ns
143+ if job .started_at_ns < earliest_start :
144+ earliest_start = job .started_at_ns
145+ if job .completed_at_ns > latest_complete :
146+ latest_complete = job .completed_at_ns
147+
148+ # Compute aggregate run time (in seconds, not ns)
149+ ag_run_time = (latest_complete - earliest_start ) / 1000000000
150+ # Compute aggregate queue time (in seconds, not ns)
151+ ag_queue_time = (latest_start - earliest_create ) / 1000000000
152+ # Append the aggregate metrics to the workflow metrics list.
153+ return AggregateMetric (
154+ ag_name , ag_queue_time , ag_run_time , ag_status , latest_complete , ag_workflow_id
155+ )
156+
157+
158+ def create_and_append_libcxx_aggregates (workflow_metrics : list [JobMetrics ]):
159+ """Find libc++ JobMetric entries and create aggregate metrics for them.
160+
161+ Sort the libc++ JobMetric entries by workflow id, and for each workflow
162+ id group them by stages. Call _construct_aggregate to reate an aggregate
163+ metric for each stage for each unique workflow id. Append each aggregate
164+ metric to the input workflow_metrics list.
165+
166+ Args:
167+ workflow_metrics: A list of JobMetrics entries collected so far.
168+ """
169+ # Separate the jobs by workflow_id. Only look at JobMetrics entries.
170+ aggregate_data = dict ()
171+ for job in workflow_metrics :
172+ # Only want to look at JobMetrics
173+ if not isinstance (job , JobMetrics ):
174+ continue
175+ # Only want libc++ jobs.
176+ if job .workflow_name != "Build and Test libc++" :
177+ continue
178+ if job .workflow_id not in aggregate_data .keys ():
179+ aggregate_data [job .workflow_id ] = [job ]
180+ else :
181+ aggregate_data [job .workflow_id ].append (job )
182+
183+ # Go through each aggregate_data list (workflow id) and find all the
184+ # needed data
185+ for ag_workflow_id in aggregate_data :
186+ job_list = aggregate_data [ag_workflow_id ]
187+ stage1_jobs = list ()
188+ stage2_jobs = list ()
189+ stage3_jobs = list ()
190+ # sort jobs into stage1, stage2, & stage3.
191+ for job in job_list :
192+ if job .job_name .find ("stage1" ) > 0 :
193+ stage1_jobs .append (job )
194+ elif job .job_name .find ("stage2" ) > 0 :
195+ stage2_jobs .append (job )
196+ elif job .job_name .find ("stage3" ) > 0 :
197+ stage3_jobs .append (job )
198+
199+ if len (stage1_jobs ) > 0 :
200+ aggregate = _construct_aggregate (
201+ "github_libcxx_premerge_checks_stage1_aggregate" , stage1_jobs
202+ )
203+ workflow_metrics .append (aggregate )
204+ if len (stage2_jobs ) > 0 :
205+ aggregate = _construct_aggregate (
206+ "github_libcxx_premerge_checks_stage2_aggregate" , stage2_jobs
207+ )
208+ workflow_metrics .append (aggregate )
209+ if len (stage3_jobs ) > 0 :
210+ aggregate = _construct_aggregate (
211+ "github_libcxx_premerge_checks_stage3_aggregate" , stage3_jobs
212+ )
213+ workflow_metrics .append (aggregate )
214+
215+
216+ def clean_up_libcxx_job_name (old_name : str ) -> str :
217+ """Convert libcxx job names to generically legal strings.
218+
219+ Args:
220+ old_name: A string with the full name of the libc++ test that was run.
221+
222+ Returns:
223+ Returns the input string with characters that might not be acceptable
224+ in some indentifier strings replaced with safer characters.
225+
226+ Take a name like 'stage1 (generic-cxx03, clang-22, clang++-22)'
227+ and convert it to 'stage1_generic_cxx03__clang_22__clangxx_22'.
228+ (Remove parentheses; replace commas, hyphens and spaces with
229+ underscores; replace '+' with 'x'.)
230+ """
231+ # Names should have exactly one set of parentheses, so break on that. If
232+ # they don't have any parentheses, then don't update them at all.
233+ if old_name .find ("(" ) == - 1 :
234+ return old_name
235+ stage , remainder = old_name .split ("(" )
236+ stage = stage .strip ()
237+ if remainder [- 1 ] == ")" :
238+ remainder = remainder [:- 1 ]
239+ remainder = remainder .replace ("-" , "_" )
240+ remainder = remainder .replace ("," , "_" )
241+ remainder = remainder .replace (" " , "_" )
242+ remainder = remainder .replace ("+" , "x" )
243+ new_name = stage + "_" + remainder
244+ return new_name
245+
84246def github_get_metrics (
85247 github_repo : github .Repository , last_workflows_seen_as_completed : set [int ]
86248) -> tuple [list [JobMetrics ], int ]:
@@ -146,6 +308,10 @@ def github_get_metrics(
146308 if task .name not in GITHUB_WORKFLOW_TO_TRACK :
147309 continue
148310
311+ libcxx_testing = False
312+ if task .name == "Build and Test libc++" :
313+ libcxx_testing = True
314+
149315 if task .status == "completed" :
150316 workflow_seen_as_completed .add (task .id )
151317
@@ -155,11 +321,19 @@ def github_get_metrics(
155321
156322 name_prefix = GITHUB_WORKFLOW_TO_TRACK [task .name ]
157323 for job in task .jobs ():
324+ if libcxx_testing :
325+ # We're not running macos or windows libc++ tests on our
326+ # infrastructure.
327+ if job .name .find ("macos" ) != - 1 or job .name .find ("windows" ) != - 1 :
328+ continue
158329 # This job is not interesting to us.
159- if job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
330+ elif job .name not in GITHUB_JOB_TO_TRACK [name_prefix ]:
160331 continue
161332
162- name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
333+ if libcxx_testing :
334+ name_suffix = clean_up_libcxx_job_name (job .name )
335+ else :
336+ name_suffix = GITHUB_JOB_TO_TRACK [name_prefix ][job .name ]
163337 metric_name = name_prefix + "_" + name_suffix
164338
165339 if task .status != "completed" :
@@ -208,21 +382,32 @@ def github_get_metrics(
208382 continue
209383
210384 logging .info (f"Adding a job metric for job { job .id } in workflow { task .id } " )
211- # The timestamp associated with the event is expected by Grafana to be
212- # in nanoseconds.
385+ # The completed_at_ns timestamp associated with the event is
386+ # expected by Grafana to be in nanoseconds. Because we do math using
387+ # all three times (when creating libc++ aggregates), we need them
388+ # all to be in nanoseconds, even though created_at and started_at
389+ # are not returned to Grafana.
390+ created_at_ns = int (created_at .timestamp ()) * 10 ** 9
391+ started_at_ns = int (started_at .timestamp ()) * 10 ** 9
213392 completed_at_ns = int (completed_at .timestamp ()) * 10 ** 9
214393 workflow_metrics .append (
215394 JobMetrics (
216395 metric_name ,
217396 queue_time .seconds ,
218397 run_time .seconds ,
219398 job_result ,
399+ created_at_ns ,
400+ started_at_ns ,
220401 completed_at_ns ,
221402 task .id ,
222403 task .name ,
223404 )
224405 )
225406
407+ # Finished collecting the JobMetrics for all jobs; now create the
408+ # aggregates for any libc++ jobs.
409+ create_and_append_libcxx_aggregates (workflow_metrics )
410+
226411 for name , value in queued_count .items ():
227412 workflow_metrics .append (
228413 GaugeMetric (f"workflow_queue_size_{ name } " , value , time .time_ns ())
@@ -278,6 +463,11 @@ def upload_metrics(workflow_metrics, metrics_userid, api_key):
278463 metrics_batch .append (
279464 f"{ name } queue_time={ workflow_metric .queue_time } ,run_time={ workflow_metric .run_time } ,status={ workflow_metric .status } { workflow_metric .completed_at_ns } "
280465 )
466+ elif isinstance (workflow_metric , AggregateMetric ):
467+ name = workflow_metric .aggregate_name .lower ().replace (" " , "_" )
468+ metrics_batch .append (
469+ f"{ name } queue_time={ workflow_metric .aggregate_queue_time } ,run_time={ workflow_metric .aggregate_run_time } ,status={ workflow_metric .aggregate_status } { workflow_metric .completed_at_ns } "
470+ )
281471 else :
282472 raise ValueError (
283473 f"Unsupported object type { type (workflow_metric )} : { str (workflow_metric )} "
0 commit comments