Skip to content

Monthly Flakiness Report CI #5120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 47 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
f3fb2f3
add Monthly Flakiness Report CI
sunmou99 Jun 29, 2023
a32f166
fix corn
sunmou99 Jun 29, 2023
3d2a902
fix cron
sunmou99 Jun 29, 2023
8490e2a
use POSIX cron syntax
sunmou99 Jun 29, 2023
b573f7f
add test trigger
sunmou99 Jun 29, 2023
237152b
add runs-on
sunmou99 Jun 29, 2023
2a19df7
test month calculation
sunmou99 Jun 29, 2023
6afb5d3
test month calculation
sunmou99 Jun 29, 2023
141e492
test month calculation
sunmou99 Jun 29, 2023
9c39213
test month calculation
sunmou99 Jun 29, 2023
77759dd
last 6 months data
sunmou99 Jun 30, 2023
f441954
last 6 months data
sunmou99 Jun 30, 2023
4452e4b
refactor workflow_information.py
sunmou99 Jul 5, 2023
7f2d49a
fix bug
sunmou99 Jul 5, 2023
d3913dc
standalong script for monthly summary report
sunmou99 Jul 5, 2023
c1ca065
add default variable value
sunmou99 Jul 5, 2023
6d092e9
add logging
sunmou99 Jul 5, 2023
11753a4
provide monthly summary in json format
sunmou99 Jul 5, 2023
5defa94
fix job_list variable
sunmou99 Jul 5, 2023
e41601a
sort by date
sunmou99 Jul 5, 2023
f608fce
print summary in markdown
sunmou99 Jul 5, 2023
4f8d956
print summary in markdown
sunmou99 Jul 5, 2023
86aa5b8
print summary in markdown
sunmou99 Jul 5, 2023
4863361
print summary in markdown
sunmou99 Jul 5, 2023
f183dd8
print summary in markdown
sunmou99 Jul 5, 2023
5e146c6
add report title
sunmou99 Jul 6, 2023
142722e
fix report format
sunmou99 Jul 6, 2023
33a3573
fix report format
sunmou99 Jul 6, 2023
b35f5b6
auto generate report
sunmou99 Jul 6, 2023
aac4599
auto generate report
sunmou99 Jul 6, 2023
74e8d45
add more description to the report
sunmou99 Jul 6, 2023
7169c9b
add more description to the report
sunmou99 Jul 6, 2023
fb59d37
add more description to the report
sunmou99 Jul 6, 2023
e0cacae
collecting artifact
sunmou99 Jul 6, 2023
d8bb165
save and read a dictionary with date objects as keys using JSON
sunmou99 Jul 6, 2023
0ead30a
repleace unsupported file name
sunmou99 Jul 6, 2023
04d0c14
read history data in artifact
sunmou99 Jul 6, 2023
791a41a
import missing lib
sunmou99 Jul 6, 2023
35f1706
import missing lib
sunmou99 Jul 6, 2023
16fb4f5
logging.info
sunmou99 Jul 6, 2023
6bd5e97
extract artifact
sunmou99 Jul 6, 2023
64a7fcf
test zip file
sunmou99 Jul 6, 2023
6a376f2
read pervious_summary_file
sunmou99 Jul 7, 2023
c094305
fix string
sunmou99 Jul 7, 2023
09db150
fix date object
sunmou99 Jul 7, 2023
d2d23ad
fix date object
sunmou99 Jul 7, 2023
d9598cd
fix missing log
sunmou99 Jul 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/flakiness-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Monthly Flakiness Report

on:
schedule:
- cron: "0 9 1 * *" # At 9am UTC (1am PST / 2am PDT), on the 1st day, every month
workflow_dispatch: # allow triggering the workflow manually
pull_request: # [remove later] allow triggering the workflow with pull_request


jobs:
monthly_flakiness_report:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Setup python
uses: actions/setup-python@v4
with:
python-version: '3.7'

- name: Install prerequisites
run: pip install requests argparse

- name: Collect postsubmit Flakiness summary
run: python ci/workflow_summary/monthly_summary.py --token ${{github.token}} --run_id ${{github.run_id}} --folder output_logs

- name: Collect Monthly Test Flakiness logs
run: python ci/workflow_summary/collect_ci_test_logs.py --token ${{github.token}} --folder output_logs

- name: Upload Desktop Cmake
uses: actions/upload-artifact@v3
if: ${{ !cancelled() }}
with:
name: output_logs
path: output_logs
retention-days: 90
2 changes: 1 addition & 1 deletion ci/workflow_summary/collect_ci_test_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def main():
success_count = job['success_count']
failure_count = job['failure_count']

log_file_path = os.path.join(file_folder, f'{job_name}.log')
log_file_path = os.path.join(file_folder, f'{job_name.replace(":", "_")}.log')
file_log = open(log_file_path, 'w')
file_log.write(f'\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')
logging.info(f'\n\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')
Expand Down
56 changes: 56 additions & 0 deletions ci/workflow_summary/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

import requests
import logging
import json
import shutil

RETRIES = 3
BACKOFF = 5
Expand Down Expand Up @@ -60,3 +62,57 @@ def job_logs(self, token, job_id):
else:
logging.info('no log avaliable')
return ''

def create_issue(self, token, title, label, body):
"""Create an issue: https://docs.github.com/en/rest/reference/issues#create-an-issue"""
url = f'{self.github_api_url}/issues'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
data = {'title': title, 'labels': [label], 'body': body}
with requests.post(url, headers=headers, data=json.dumps(data), timeout=TIMEOUT) as response:
logging.info("create_issue: %s response: %s", url, response)
return response.json()

def get_issue_body(self, token, issue_number):
"""https://docs.github.com/en/rest/reference/issues#get-an-issue-comment"""
url = f'{self.github_api_url}/issues/{issue_number}'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests.get(url, headers=headers, timeout=TIMEOUT) as response:
logging.info("get_issue_body: %s response: %s", url, response)
return response.json()["body"]

def update_issue_comment(self, token, issue_number, comment):
"""Update an issue: https://docs.github.com/en/rest/reference/issues#update-an-issue"""
url = f'{self.github_api_url}/issues/{issue_number}'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests.patch(url, headers=headers, data=json.dumps({'body': comment}), timeout=TIMEOUT) as response:
logging.info("update_issue: %s response: %s", url, response)

def search_issues_by_label(self, owner, repo, label):
"""https://docs.github.com/en/rest/reference/search#search-issues-and-pull-requests"""
url = f'https://api.github.com/search/issues?q=repo:{owner}/{repo}+label:"{label}"+is:issue'
headers = {'Accept': 'application/vnd.github.v3+json'}
with requests.get(url, headers=headers, timeout=TIMEOUT) as response:
logging.info("search_issues_by_label: %s response: %s", url, response)
return response.json()["items"]

def list_artifacts(self, token, run_id):
"""https://docs.github.com/en/rest/reference/actions#list-workflow-run-artifacts"""
url = f'{self.github_api_url}/actions/runs/{run_id}/artifacts'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests.get(url, headers=headers, timeout=TIMEOUT) as response:
logging.info("list_artifacts: %s response: %s", url, response)
return response.json()["artifacts"]


def download_artifact(self, token, artifact_id, output_path=None):
"""https://docs.github.com/en/rest/reference/actions#download-an-artifact"""
url = f'{self.github_api_url}/actions/artifacts/{artifact_id}/zip'
headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'}
with requests.get(url, headers=headers, stream=True, timeout=TIMEOUT_LONG) as response:
logging.info("download_artifact: %s response: %s", url, response)
if output_path:
with open(output_path, 'wb') as file:
shutil.copyfileobj(response.raw, file)
elif response.status_code == 200:
return response.content
return None
221 changes: 221 additions & 0 deletions ci/workflow_summary/monthly_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import github
import os
import re
import json
import zipfile
import datetime
import argparse
import logging
import workflow_information


'''A utility collecting ci_test.yml workflow failure logs.

Usage:

python workflow_information.py --token ${github_toke} --workflow_name ${workflow_name}

'''

REPO_OWNER = 'firebase'
REPO_NAME = 'firebase-android-sdk'
EXCLUDE_JOB_LIST = ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']

REPORT_LABEL = 'flakiness-history'
REPORT_TITLE = 'Monthly Flakiness History'

def main():
logging.getLogger().setLevel(logging.INFO)

args = parse_cmdline_args()
token = args.token
output_folder = os.path.normpath(args.folder)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
gh = github.GitHub(REPO_OWNER, REPO_NAME)

issue_number = get_issue_number(gh)
monthly_summary = get_pervious_report(gh, token, issue_number)
monthly_summary = get_latest_monthly_summary(gh, token, monthly_summary, output_folder)
print(monthly_summary)
summary_report = markdown_report(monthly_summary, args.run_id)
print(summary_report)
update_report(gh, token, issue_number, summary_report)


def get_latest_monthly_summary(gh, token, monthly_summary, output_folder):
first_day_in_month = datetime.date.today().replace(day=1)
month = 1 if monthly_summary else 6
for i in range(month):
last_day_in_month = first_day_in_month - datetime.timedelta(days=1)
first_day_in_month = last_day_in_month.replace(day=1)
first_day_in_month = first_day_in_month

from_time = datetime.datetime.combine(first_day_in_month, datetime.time.min)
to_time = datetime.datetime.combine(last_day_in_month, datetime.time.max)
created = from_time.strftime('%Y-%m-%dT%H:%M:%SZ') + '..' + to_time.strftime('%Y-%m-%dT%H:%M:%SZ')

workflow_summary = workflow_information.get_workflow_summary(gh=gh, token=token, created=created, workflow_name='ci_tests.yml', event='push', branch='master')
failure_rate = float(workflow_summary['failure_count']/workflow_summary['total_count'])
monthly_summary[last_day_in_month] = {
'failure_rate': failure_rate,
'total_count': workflow_summary['total_count'],
'success_count': workflow_summary['success_count'],
'failure_count': workflow_summary['failure_count'],
'failure_jobs':{}
}

job_summary = workflow_information.get_job_summary(workflow_summary)
if i == 0:
job_summary_file_path = os.path.join(output_folder, 'job_summary.json')
with open(job_summary_file_path, 'w') as f:
json.dump(job_summary, f)
logging.info(f'Job summary has been write to {job_summary_file_path}\n')

monthly_summary[last_day_in_month]['failure_jobs'] = {
job_name: {
'failure_rate': job['failure_rate'],
'total_count': job['total_count'],
'success_count': job['success_count'],
'failure_count': job['failure_count'],
}
for job_name, job in job_summary.items() if job['failure_rate'] > 0
}

monthly_summary_file_path = os.path.join(output_folder, 'monthly_summary.json')
with open(monthly_summary_file_path, 'w') as f:
json.dump({date_to_string(key): value for key, value in monthly_summary.items()}, f)
logging.info(f'Job summary has been write to {monthly_summary_file_path}\n')

return monthly_summary


def markdown_report(monthly_summary, run_id):
monthly_summary = dict(sorted(monthly_summary.items(), reverse=True))

markdown_report = f"## {REPORT_TITLE} \n\n"
markdown_report += f"**[Click to View and Download the Artifacts for Last Month's Flakiness Logs](https://github.com/firebase/firebase-android-sdk/actions/runs/{run_id})**\n\n"
markdown_report += "*** \n\n"

# List to hold all dates
dates = [date.strftime('%b %Y') for date in sorted(monthly_summary.keys(), reverse=True)]
markdown_report += "#### Workflow Flakiness History \n\n"
markdown_report += f"| Workflow | {' | '.join(dates)} |\n"
markdown_report += "| --- |" + " --- |" * len(dates) + "\n"
# For the workflow, generate the failure rate for each month
workflow_data = []
workflow_data = [f"{summary['failure_rate']:.2%} ({summary['failure_count']}/{summary['total_count']})" for summary in monthly_summary.values()]
markdown_report += f"| ci_tests.yml | {' | '.join(workflow_data)} |\n\n"
markdown_report += "*** \n\n"

# List to hold all dates
markdown_report += "#### Job Flakiness History \n\n"
markdown_report += "This table presents two categories of job failures: \n"
markdown_report += "1) jobs that failed in the last month \n"
markdown_report += "2) jobs that had a high failure rate (exceeding 10% on average) over the past six months \n\n"
markdown_report += f"| Job | {' | '.join(dates)} |\n"
markdown_report += "| --- |" + " --- |" * len(dates) + "\n"
# Sorted Jobs for the latest month
latest_month = next(iter(monthly_summary.values()))
sorted_jobs = sorted(latest_month['failure_jobs'], key=lambda job: latest_month['failure_jobs'][job]['failure_rate'], reverse=True)
# High Failure Jobs in 6 months
all_jobs = {job for summary in list(monthly_summary.values())[:6] for job in summary['failure_jobs']}
all_jobs.difference_update(set(sorted_jobs))
avg_failure_rates = {job: sum([summary['failure_jobs'][job]['failure_rate'] for summary in list(monthly_summary.values())[:6] if job in summary['failure_jobs']])/6 for job in all_jobs}
high_failure_jobs = {job: rate for job, rate in avg_failure_rates.items() if rate >= 0.1}
# Combine Last Month Failure Jobs and High Failure Rate Jobs
all_jobs = sorted_jobs + sorted(high_failure_jobs, key=high_failure_jobs.get, reverse=True)
# Loop through All Jobs
for job_name in all_jobs:
if job_name in EXCLUDE_JOB_LIST:
continue
job_data = []
for _, one_month_summary in sorted(monthly_summary.items(), reverse=True):
one_month_job_summary = one_month_summary['failure_jobs'].get(job_name)
if one_month_job_summary:
job_data.append(f"{one_month_job_summary['failure_rate']:.2%} ({one_month_job_summary['failure_count']}/{one_month_job_summary['total_count']})")
else:
job_data.append('N/A')
markdown_report += f"| {job_name} | {' | '.join(job_data)} |\n"

return markdown_report


def get_issue_number(gh):
issues = gh.search_issues_by_label(REPO_OWNER, REPO_NAME, REPORT_LABEL)
for issue in issues:
if issue['title'] == REPORT_TITLE:
return issue['number']


def get_pervious_report(gh, token, issue_number):
pervious_monthly_summary = {}
if issue_number:
issue_body = gh.get_issue_body(token, issue_number)
logging.info(issue_body)
# The regex pattern to match "run_id" in the URL
pattern = r"https://github.com/firebase/firebase-android-sdk/actions/runs/(\d+)"
# Use re.search() to search for the pattern
match = re.search(pattern, issue_body)
if match:
run_id = match.group(1)
artifacts = gh.list_artifacts(token, run_id)
for artifact in artifacts:
if artifact['name'] == 'output_logs':
gh.download_artifact(token, artifact['id'], 'artifact.zip')
# extract all the files
with zipfile.ZipFile('artifact.zip', 'r') as zip_ref:
zip_ref.extractall('artifact')
pervious_summary_file = os.path.join('artifact', 'monthly_summary.json')
if os.path.exists(pervious_summary_file):
with open(pervious_summary_file, 'r') as f:
loaded_data = json.load(f)
logging.info(loaded_data)
pervious_monthly_summary = {string_to_date(key): value for key, value in loaded_data.items()}

return pervious_monthly_summary


def update_report(gh, token, issue_number, summary_report):
if not issue_number:
gh.create_issue(token, REPORT_TITLE, REPORT_LABEL, summary_report)
else:
gh.update_issue_comment(token, issue_number, summary_report)


# Function to convert date to string
def date_to_string(date):
return date.strftime('%Y-%m-%d')


# Function to convert string to date
def string_to_date(date_string):
return datetime.datetime.strptime(date_string, '%Y-%m-%d').date()


def parse_cmdline_args():
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--token', required=True, help='GitHub access token')
parser.add_argument('-i', '--run_id', required=True, help='Workflow run id')
parser.add_argument('-f', '--folder', required=True, help='Folder generated by workflow_information.py. Test logs also locate here.')

args = parser.parse_args()
return args


if __name__ == '__main__':
main()
Loading