Skip to content

Commit 0854a9f

Browse files
CASSANDRA-18399: Add a script to prepare merge commands
1 parent 814efae commit 0854a9f

11 files changed

+1313
-0
lines changed

dev/scripts/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import os
2+
import sys
3+
4+
PROJECT_PATH = os.path.dirname(os.path.abspath(__file__))
5+
sys.path.append(PROJECT_PATH)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# https://app.circleci.com/pipelines/github/jacek-lewandowski/cassandra/1252/workflows/b10132a7-1b4f-44d0-8808-f19a3b5fde69/jobs/63797
2+
# https://circleci.com/api/v2/project/gh/jacek-lewandowski/cassandra/63797/tests
3+
# {
4+
# "items": [
5+
# {
6+
# "classname": "org.apache.cassandra.distributed.test.LegacyCASTest",
7+
# "name": "testRepairIncompletePropose-_jdk17",
8+
# "result": "success",
9+
# "message": "",
10+
# "run_time": 15.254,
11+
# "source": "unknown"
12+
# }
13+
# ,{
14+
# "classname": "org.apache.cassandra.distributed.test.NativeTransportEncryptionOptionsTest",
15+
# "name": "testEndpointVerificationEnabledIpNotInSAN-cassandra.testtag_IS_UNDEFINED",
16+
# "result": "failure",
17+
# "message": "junit.framework.AssertionFailedError: Forked Java VM exited abnormally. Please note the time in the report does not reflect the time until the VM exit.\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat org.apache.cassandra.anttasks.TestHelper.execute(TestHelper.java:53)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.base/java.util.Vector.forEach(Vector.java:1365)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat jdk.internal.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)\n\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)",
18+
# "run_time": 0.001,
19+
# "source": "unknown"
20+
# }
21+
# ]
22+
# }
23+
import csv
24+
25+
# So here is the plan:
26+
# I have a link to the pipeline: https://app.circleci.com/pipelines/github/jacek-lewandowski/cassandra/1252
27+
# The program goes through all the workflow jobs and list the failed tests along with the workflow, job, etc.
28+
# Then:
29+
# - separate failures into 3 groups:
30+
# 1. flaky - if a test was repeated in mulitple jobs and failred in some of them
31+
# 2. failure - if a test was repeated in multiple jobs and failed in all of them
32+
# 3. suspected - if a test was not repeated
33+
34+
# Then for each failure list Jira tickets that mention the test name.
35+
36+
# Having that information, let the user decide what to do with each failure:
37+
# - select a jira ticket
38+
# - create a new ticket
39+
# - do not associate with any ticket
40+
# - report on the PR
41+
42+
# Eventually, the user can create the script which can perform the planned operations
43+
44+
from lib.circleci_utils import *
45+
46+
class TestFailure(NamedTuple):
47+
file: str
48+
classname: str
49+
name: str
50+
jobs_comp: str
51+
jobs_list: list
52+
53+
class TestFailureComparison(NamedTuple):
54+
file: str
55+
classname: str
56+
name: str
57+
feature_jobs: set
58+
base_jobs: set
59+
jobs_comp: str
60+
61+
if len(sys.argv) != 4 and len(sys.argv) != 6:
62+
print("Usage: %s <repo> <workflow_id> <output.csv>" % sys.argv[0])
63+
print("Usage: %s <feature repo> <feature workflow id > <base repo> <base workflow id> <output.csv>" % sys.argv[0])
64+
sys.exit(1)
65+
66+
if len(sys.argv) == 4:
67+
repo = sys.argv[1]
68+
workflow_id = sys.argv[2]
69+
output_file = sys.argv[3]
70+
failed_tests_dict = get_failed_tests(repo, workflow_id)
71+
failed_tests = []
72+
for file in failed_tests_dict:
73+
for classname in failed_tests_dict[file]:
74+
for name in failed_tests_dict[file][classname]:
75+
jobs = list(failed_tests_dict[file][classname][name])
76+
jobs.sort()
77+
failed_tests.append(TestFailure(file, classname, name, ",".join(failed_tests_dict[file][classname][name]), jobs))
78+
79+
# sort failed tests by jobs, file, classname, name
80+
failed_tests.sort(key=lambda test: (test.jobs_comp, test.file, test.classname, test.name))
81+
82+
# save failed_tests to csv file
83+
with open(output_file, 'w') as csvfile:
84+
writer = csv.writer(csvfile)
85+
writer.writerow(['file', 'classname', 'name', 'jobs'])
86+
for test in failed_tests:
87+
writer.writerow([test.file, test.classname, test.name, test.jobs_comp])
88+
89+
else:
90+
feature_repo = sys.argv[1]
91+
feature_workflow_id = sys.argv[2]
92+
base_repo = sys.argv[3]
93+
base_workflow_id = sys.argv[4]
94+
output_file = sys.argv[5]
95+
feature_failed_tests_dict = get_failed_tests(feature_repo, feature_workflow_id)
96+
base_failed_tests_dict = get_failed_tests(base_repo, base_workflow_id)
97+
98+
failed_tests = []
99+
all_files = set(feature_failed_tests_dict.keys()).union(set(base_failed_tests_dict.keys()))
100+
for file in all_files:
101+
feature_classnames = feature_failed_tests_dict[file] if file in feature_failed_tests_dict else {}
102+
base_classnames = base_failed_tests_dict[file] if file in base_failed_tests_dict else {}
103+
all_classnames = set(feature_classnames.keys()).union(set(base_classnames.keys()))
104+
for classname in all_classnames:
105+
feature_names = feature_classnames[classname] if classname in feature_classnames else {}
106+
base_names = base_classnames[classname] if classname in base_classnames else {}
107+
all_names = set(feature_names.keys()).union(set(base_names.keys()))
108+
for name in all_names:
109+
feature_jobs = feature_names[name] if name in feature_names else set()
110+
base_jobs = base_names[name] if name in base_names else set()
111+
jobs_comp = list(feature_jobs.union(base_jobs))
112+
jobs_comp.sort()
113+
failed_tests.append(TestFailureComparison(file, classname, name, feature_jobs, base_jobs, ",".join(jobs_comp)))
114+
115+
# sort failed tests by jobs, file, classname, name
116+
failed_tests.sort(key=lambda test: (test.jobs_comp, test.file, test.classname, test.name))
117+
118+
# save failed_tests to csv file
119+
with open(output_file, 'w') as csvfile:
120+
writer = csv.writer(csvfile)
121+
writer.writerow(['file', 'classname', 'name', 'failed in feature only', 'failed in base only', 'failed in both'])
122+
for test in failed_tests:
123+
feature_only_jobs = list(test.feature_jobs.difference(test.base_jobs))
124+
feature_only_jobs.sort()
125+
base_only_jobs = list(test.base_jobs.difference(test.feature_jobs))
126+
base_only_jobs.sort()
127+
common_jobs = list(test.feature_jobs.intersection(test.base_jobs))
128+
common_jobs.sort()
129+
writer.writerow([test.file, test.classname, test.name, ",".join(feature_only_jobs), ",".join(base_only_jobs), ",".join(common_jobs)])

dev/scripts/lib/circleci_utils.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import json
2+
import sys
3+
from enum import Enum
4+
from typing import NamedTuple
5+
6+
import urllib3
7+
8+
class PipelineInfo(NamedTuple):
9+
id: str
10+
number: int
11+
12+
def get_pipelines_from_circleci(repo, branch):
13+
http = urllib3.PoolManager()
14+
url = "https://circleci.com/api/v2/project/gh/%s/cassandra/pipeline?branch=%s" % (repo, branch)
15+
r = http.request('GET', url)
16+
if r.status == 200:
17+
items = json.loads(r.data.decode('utf-8'))['items']
18+
return [PipelineInfo(id=item['id'], number=item['number']) for item in items]
19+
return None
20+
21+
class WorkflowInfo(NamedTuple):
22+
id: str
23+
name: str
24+
status: str
25+
26+
def get_pipeline_workflows(pipeline_id):
27+
http = urllib3.PoolManager()
28+
url = "https://circleci.com/api/v2/pipeline/%s/workflow" % (pipeline_id)
29+
r = http.request('GET', url)
30+
if r.status == 200:
31+
items = json.loads(r.data.decode('utf-8'))['items']
32+
return [WorkflowInfo(id=item['id'], name=item['name'], status=item['status']) for item in items]
33+
34+
class JobType(Enum):
35+
BUILD = "build"
36+
APPROVAL = "approval"
37+
38+
class JobStatus(Enum):
39+
SUCCESS = "success"
40+
RUNNING = "running"
41+
NOT_RUN = "not_run"
42+
FAILED = "failed"
43+
RETRIED = "retried"
44+
QUEUED = "queued"
45+
NOT_RUNNING = "not_running"
46+
INFRASTRUCTURE_FAIL = "infrastructure_fail"
47+
TIMEDOUT = "timedout"
48+
ON_HOLD = "on_hold"
49+
TERMINATED_UNKNOWN = "terminated-unknown"
50+
BLOCKED = "blocked"
51+
CANCELED = "canceled"
52+
UNAUTHORIZED = "unauthorized"
53+
54+
class JobInfo(NamedTuple):
55+
id: str
56+
name: str
57+
status: JobStatus
58+
job_number: str
59+
type: JobType
60+
61+
def job_info_from_json(json):
62+
return JobInfo(id=json['id'], name=json['name'], status=JobStatus(json['status']), job_number=json['job_number'] if 'job_number' in json else None , type=JobType(json['type']))
63+
64+
def get_workflow_jobs(workflow_id):
65+
http = urllib3.PoolManager()
66+
url = "https://circleci.com/api/v2/workflow/%s/job" % (workflow_id)
67+
r = http.request('GET', url)
68+
if r.status == 200:
69+
items = json.loads(r.data.decode('utf-8'))['items']
70+
print("Found %d jobs" % len(items))
71+
return [job_info_from_json(item) for item in items]
72+
return None
73+
74+
def get_failed_jobs(workflow_id):
75+
jobs = get_workflow_jobs(workflow_id)
76+
failed_jobs = []
77+
for job in jobs:
78+
if job.status == JobStatus.FAILED and job.job_number is not None:
79+
failed_jobs.append(job)
80+
else:
81+
print("Skipping job %s" % str(job))
82+
return failed_jobs
83+
84+
class TestResult(Enum):
85+
SUCCESS = "success"
86+
FAILURE = "failure"
87+
SKIPPED = "skipped"
88+
ERROR = "error"
89+
UNKNOWN = "unknown"
90+
91+
class TestInfo(NamedTuple):
92+
message: str
93+
source: str
94+
run_time: float
95+
file: str
96+
result: TestResult
97+
name: str
98+
classname: str
99+
100+
def get_job_tests(repo, job_number):
101+
http = urllib3.PoolManager()
102+
url = "https://circleci.com/api/v2/project/gh/%s/cassandra/%s/tests" % (repo, job_number)
103+
r = http.request('GET', url)
104+
if r.status == 200:
105+
tests = [TestInfo(t['message'], t['source'], t['run_time'], t['file'] if 'file' in t else "", TestResult(t['result']), t['name'], t['classname']) for t in json.loads(r.data.decode('utf-8'))['items']]
106+
return tests
107+
return None
108+
109+
110+
def get_failed_tests(repo, workflow_id):
111+
failed_jobs = get_failed_jobs(workflow_id)
112+
failed_tests = {}
113+
for job in failed_jobs:
114+
print("Getting tests for job %s" % str(job))
115+
tests = get_job_tests(repo, job.job_number)
116+
for test in tests:
117+
if test.result == TestResult.FAILURE:
118+
if test.file not in failed_tests:
119+
failed_tests[test.file] = {}
120+
if test.classname not in failed_tests[test.file]:
121+
failed_tests[test.file][test.classname] = {}
122+
test_name = test.name.split("-", 2)[0]
123+
test_name = test_name.split("[", 2)[0]
124+
if test_name not in failed_tests[test.file][test.classname]:
125+
failed_tests[test.file][test.classname][test_name] = set()
126+
failed_tests[test.file][test.classname][test_name].add(job.name)
127+
128+
return failed_tests

0 commit comments

Comments
 (0)