Skip to content

Commit c3ae2ab

Browse files
authored
[CI] Collect labels and time merged for LLVM pull requests (llvm#740)
Begin collecting data for when pull requests are merged into main, as well as what labels those pull requests are tagged with. For the time being, all pull requests we're concerned with should have their `mergedAt` field set. However, it is likely that we will also be interested in pull requests that haven't been merged sometime in the near future.
1 parent ad4845c commit c3ae2ab

File tree

3 files changed

+48
-4
lines changed

3 files changed

+48
-4
lines changed

premerge/bigquery_schema/llvm_pull_requests_table_schema.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
"mode": "NULLABLE",
1212
"description": "Time the pull request was created at, as a Unix timestamp."
1313
},
14+
{
15+
"name": "merged_at_timestamp_seconds",
16+
"type": "INTEGER",
17+
"mode": "NULLABLE",
18+
"description": "Time the pull request was merged at, as a Unix timestamp."
19+
},
1420
{
1521
"name": "pull_request_number",
1622
"type": "INTEGER",
@@ -22,5 +28,11 @@
2228
"type": "STRING",
2329
"mode": "NULLABLE",
2430
"description": "SHA of the commit associated with the pull request."
31+
},
32+
{
33+
"name": "labels",
34+
"type": "STRING",
35+
"mode": "REPEATED",
36+
"description": "Labels associated with the pull request."
2537
}
2638
]

premerge/ops-container/process_llvm_commits.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
author {{ login }}
4646
number
4747
createdAt
48+
mergedAt
49+
labels(first: 25) {{
50+
nodes {{
51+
name
52+
}}
53+
}}
4854
reviews(last: 100) {{
4955
nodes {{
5056
state
@@ -66,7 +72,9 @@ class LLVMCommitData:
6672
commit_sha: str
6773
commit_timestamp_seconds: int
6874
diff: list[dict[str, int | str]]
69-
commit_author: str | None = None # Username of author is unknown until API call
75+
commit_author: str | None = (
76+
None # Username of author is unknown until API call
77+
)
7078
associated_pull_request: int | None = None
7179
is_revert: bool = False
7280
pull_request_reverted: int | None = None
@@ -78,7 +86,9 @@ class LLVMPullRequestData:
7886
pull_request_number: int
7987
pull_request_author: str
8088
pull_request_timestamp_seconds: int
89+
merged_at_timestamp_seconds: int
8190
associated_commit: str
91+
labels: list[str]
8292

8393

8494
@dataclasses.dataclass
@@ -255,16 +265,26 @@ def extract_pull_request_data(
255265
)
256266

257267
# Convert ISO timestamp to Unix timestamp, in seconds
258-
unix_timestamp = int(
268+
create_unix_timestamp = int(
259269
datetime.datetime.fromisoformat(pull_request["createdAt"]).timestamp()
260270
)
271+
if pull_request["mergedAt"] is not None:
272+
merge_unix_timestamp = int(
273+
datetime.datetime.fromisoformat(pull_request["mergedAt"]).timestamp()
274+
)
275+
else:
276+
merge_unix_timestamp = None
261277

262278
pull_request_data.append(
263279
LLVMPullRequestData(
264280
pull_request_number=pull_request["number"],
265281
pull_request_author=author_login,
266-
pull_request_timestamp_seconds=unix_timestamp,
282+
pull_request_timestamp_seconds=create_unix_timestamp,
283+
merged_at_timestamp_seconds=merge_unix_timestamp,
267284
associated_commit=commit_sha.removeprefix("commit_"),
285+
labels=[
286+
label["name"] for label in pull_request["labels"]["nodes"]
287+
],
268288
)
269289
)
270290

@@ -354,7 +374,6 @@ def query_github_graphql_api(
354374
},
355375
json={"query": query},
356376
)
357-
358377
# Exit if API call fails
359378
# A failed API call means a large batch of data is missing and will not be
360379
# reflected in the dashboard. The dashboard will silently misrepresent

premerge/ops-container/process_llvm_commits_test.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,10 @@ def _create_pull_request_api_data(
7373
self,
7474
pull_request_number: int,
7575
created_at: str | None = None,
76+
merged_at: str | None = None,
7677
pull_request_author: str | None = None,
7778
reviews: list[dict[str, Any]] | None = None,
79+
labels: list[str] | None = None,
7880
) -> dict[str, Any]:
7981
"""Create a GitHub API response for a pull request."""
8082
return {
@@ -83,7 +85,9 @@ def _create_pull_request_api_data(
8385
{'login': pull_request_author} if pull_request_author else None
8486
),
8587
'createdAt': created_at,
88+
'mergedAt': merged_at,
8689
'reviews': {'nodes': reviews or []},
90+
'labels': {'nodes': [{'name': label} for label in labels or []]},
8791
}
8892

8993
def _create_review_api_data(
@@ -321,10 +325,14 @@ def test_extract_pull_request_data(self):
321325
"""Test extracting pull request data from GitHub API data."""
322326
created_at = datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)
323327
created_at_iso = created_at.isoformat()
328+
merged_at = datetime.datetime(2020, 1, 2, tzinfo=datetime.timezone.utc)
329+
merged_at_iso = merged_at.isoformat()
324330
pull_request_api_data = self._create_pull_request_api_data(
325331
pull_request_number=12345,
326332
created_at=created_at_iso,
333+
merged_at=merged_at_iso,
327334
pull_request_author='pull_request_author',
335+
labels=['llvm:test-label'],
328336
)
329337
commit_api_data = {
330338
'commit_abcdef': self._create_commit_api_data(
@@ -345,7 +353,12 @@ def test_extract_pull_request_data(self):
345353
pull_request_data[0].pull_request_timestamp_seconds,
346354
created_at.timestamp(),
347355
)
356+
self.assertEqual(
357+
pull_request_data[0].merged_at_timestamp_seconds,
358+
merged_at.timestamp(),
359+
)
348360
self.assertEqual(pull_request_data[0].associated_commit, 'abcdef')
361+
self.assertIn('llvm:test-label', pull_request_data[0].labels)
349362

350363
def test_extract_pull_request_data_with_missing_author(self):
351364
"""Test extracting pull request data from GitHub API data."""

0 commit comments

Comments
 (0)