Skip to content

Commit d71446b

Browse files
committed
fixup! Post CI health reports to Slack
1 parent 6274c23 commit d71446b

File tree

4 files changed

+96
-75
lines changed

4 files changed

+96
-75
lines changed

.github/workflows/reports.yml

+29-11
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,27 @@ jobs:
3535
- name: Generate Slack message
3636
id: message
3737
run: |
38-
for name in health failing-jobs; do
39-
output=$(docker exec trino \
40-
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
41-
trino://localhost:8080/trinocicd/v2 \
42-
--file /sql/ci-cd/$name.sql \
43-
--output-format=ALIGNED)
38+
output=$(docker exec trino \
39+
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
40+
trino://localhost:8080/trinocicd/v2 \
41+
--file /sql/ci-cd/health.sql \
42+
--output-format=VERTICAL | tail -n+2)
4443
45-
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
46-
echo "$name<<$EOF" >> $GITHUB_OUTPUT
47-
echo '```'"$output"'```' >> $GITHUB_OUTPUT
48-
echo "$EOF" >> $GITHUB_OUTPUT
49-
done
44+
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
45+
echo "health<<$EOF" >> $GITHUB_OUTPUT
46+
echo '```'"$output"'```' >> $GITHUB_OUTPUT
47+
echo "$EOF" >> $GITHUB_OUTPUT
48+
49+
output=$(docker exec trino \
50+
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
51+
trino://localhost:8080/trinocicd/v2 \
52+
--file /sql/ci-cd/failing-jobs.sql \
53+
--output-format=CSV_UNQUOTED)
54+
55+
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
56+
echo "failing-jobs<<$EOF" >> $GITHUB_OUTPUT
57+
echo "$output" >> $GITHUB_OUTPUT
58+
echo "$EOF" >> $GITHUB_OUTPUT
5059
- name: Post to a Slack channel
5160
id: slack
5261
uses: slackapi/[email protected]
@@ -86,6 +95,15 @@ jobs:
8695
"type": "mrkdwn",
8796
"text": ${{ toJSON(steps.message.outputs.failing-jobs) }}
8897
}
98+
},
99+
{
100+
"type": "context",
101+
"elements": [
102+
{
103+
"type": "mrkdwn",
104+
"text": "See the complete <https://trinodb.github.io/reports/reports/flaky/|flaky jobs report>"
105+
}
106+
]
89107
}
90108
]
91109
}

sql/ci-cd/failing-jobs.sql

+13-28
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,45 @@
11
-- Recently failing jobs
22
-- Lists failed jobs on master branch in last 24h
3-
WITH
4-
-- bar() function renders colored output, and uses one character rendering (boxes), so has less precision within same width
5-
FUNCTION ascii_bar(value double)
6-
RETURNS varchar
7-
DETERMINISTIC
8-
BEGIN
9-
DECLARE max_width double DEFAULT 20;
10-
DECLARE clamped_value double;
11-
SET clamped_value = greatest(0, least(1, value));
12-
RETURN array_join(
13-
repeat('',
14-
greatest(0, CAST(floor(max_width * clamped_value) AS integer) - 1)), '')
15-
|| ARRAY[' ', '', '', '', '', '', '', '', ''][cast((clamped_value % (1e0 / max_width)) * max_width * 8 + 1 as int)];
16-
END
173
WITH
184
recent_master_runs AS (
195
SELECT
206
id
217
, check_suite_id
22-
, 'https://github.com/trinodb/trino/actions/runs/' || cast(id as varchar) AS details_url
8+
, format('<https://github.com/trinodb/trino/actions/runs/%d|%d>', id, run_number) AS details_link
239
, conclusion
2410
FROM runs
2511
WHERE owner = 'trinodb' AND repo = 'trino' AND name = 'ci'
26-
-- only include push events, not pull requests, to filter out PR runs from master branch in forks
27-
AND head_branch = 'master' AND event = 'push' AND status = 'completed'
12+
-- do not include pull requests, to filter out PR runs from master branch in forks
13+
AND head_branch = 'master' AND event != 'pull_request' AND status = 'completed'
2814
AND created_at > now() - interval '1' day
2915
)
3016
, failed_recent_master_runs AS (
3117
SELECT
3218
id
3319
, check_suite_id
34-
, details_url
20+
, details_link
3521
FROM recent_master_runs
3622
WHERE conclusion != 'success'
3723
)
3824
, failed_jobs AS (
3925
SELECT
4026
jobs.name
4127
, count(DISTINCT runs.id) AS num_failed_runs
42-
, array_agg(anno.title || chr(10) || anno.message ORDER BY anno.message) AS errors
43-
, array_agg(DISTINCT runs.details_url ORDER BY runs.details_url) AS failed_runs
28+
, array_agg(DISTINCT runs.details_link ORDER BY runs.details_link) AS failed_runs
4429
FROM failed_recent_master_runs runs
4530
JOIN check_runs jobs ON jobs.check_suite_id = runs.check_suite_id AND jobs.conclusion NOT IN ('success', 'skipped')
46-
LEFT JOIN check_run_annotations anno ON anno.check_run_id = jobs.id
4731
GROUP BY jobs.name
4832
)
4933

5034
SELECT
51-
name AS "Job name"
52-
, ascii_bar(1e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs)) AS "Failure ratio chart"
53-
, round(100e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs), 1) AS "Failure percent"
54-
, num_failed_runs AS "Number of failed runs"
55-
-- whole report must be under 3k characters to fit into a Slack notification
56-
--, errors AS "Error messages"
57-
, failed_runs AS "Run URLs"
35+
format(
36+
'• %s - %.1f%% (%d/%d)%n %s'
37+
, name
38+
, 100e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs)
39+
, num_failed_runs
40+
, (SELECT count(*) FROM recent_master_runs)
41+
, array_join(failed_runs, ', ')
42+
) AS "Jobs"
5843
FROM failed_jobs
5944
ORDER BY num_failed_runs DESC, name
6045
;

sql/ci-cd/health.sql

+53-36
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,63 @@
11
-- CI workflow health
2-
WITH
3-
-- bar() function renders colored output, and uses one character rendering (boxes), so has less precision within same width
4-
FUNCTION ascii_bar(value double)
5-
RETURNS varchar
6-
DETERMINISTIC
7-
BEGIN
8-
DECLARE max_width double DEFAULT 20;
9-
DECLARE clamped_value double;
10-
SET clamped_value = greatest(0, least(1, value));
11-
RETURN array_join(
12-
repeat('',
13-
greatest(0, CAST(floor(max_width * clamped_value) AS integer) - 1)), '')
14-
|| ARRAY[' ', '', '', '', '', '', '', '', ''][cast((clamped_value % (1e0 / max_width)) * max_width * 8 + 1 as int)];
15-
END
2+
-- Percentage of successful runs of the `ci` workflow on the master branch.
3+
WITH FUNCTION vertical_bar(value DOUBLE, day DATE)
4+
RETURNS VARCHAR
5+
DETERMINISTIC
6+
RETURN CASE
7+
-- for weekends, if missing or zero, grey it out
8+
WHEN value = 0 AND day_of_week(day) IN (6,7) THEN ''
9+
-- map [0.0, 1.0] to [1, 9]
10+
ELSE ARRAY[' ', '', '', '', '', '', '', '', ''][cast(value * 8 + 1 as int)]
11+
END
1612
WITH
1713
runs AS (
1814
SELECT
19-
CASE head_branch
20-
WHEN 'master' THEN head_branch
21-
ELSE '[other]'
22-
END AS branch
23-
, created_at
15+
date(created_at) AS created_at
2416
, conclusion
17+
, count(*) AS num_runs
18+
, count(*) FILTER (WHERE conclusion = 'success') AS num_success
2519
FROM runs
26-
WHERE owner = 'trinodb' AND repo = 'trino'
27-
AND name = 'ci' AND created_at >= CURRENT_DATE - INTERVAL '30' DAY
20+
WHERE owner = 'trinodb' AND repo = 'trino' AND name = 'ci'
21+
AND head_branch = 'master' AND event != 'pull_request' AND status = 'completed'
22+
AND created_at >= CURRENT_DATE - INTERVAL '7' DAY
23+
GROUP BY date(created_at), conclusion
24+
)
25+
, days AS (
26+
SELECT seq.day
27+
FROM (SELECT min(created_at) AS first_day , max(created_at) AS last_day FROM runs) range
28+
CROSS JOIN UNNEST (sequence(range.first_day, range.last_day)) seq(day)
29+
)
30+
, daily AS (
31+
SELECT
32+
created_at
33+
, conclusion
34+
, num_runs
35+
, num_success
36+
, 1e0 * num_success / num_runs AS ratio
37+
FROM days
38+
LEFT JOIN runs ON runs.created_at = days.day
2839
)
29-
, intervals(days, label) AS (
30-
VALUES
31-
(INTERVAL '1' DAY, '1 day')
32-
, (INTERVAL '3' DAY, '3 days')
33-
, (INTERVAL '7' DAY, '7 days')
34-
, (INTERVAL '30' DAY, '30 days')
40+
, summary AS (
41+
SELECT
42+
sum(num_runs) AS num_runs
43+
, sum(num_success) AS num_success
44+
, 1e0 * sum(num_success) / sum(num_runs) AS ratio
45+
, array_join(array_agg(vertical_bar(coalesce(ratio, 0), created_at) ORDER BY created_at DESC), '') AS chart
46+
FROM daily
47+
)
48+
, latest AS (
49+
SELECT
50+
num_runs
51+
, num_success
52+
, 1e0 * num_success / num_runs AS ratio
53+
FROM runs
54+
ORDER BY created_at DESC
55+
LIMIT 1
3556
)
3657
SELECT
37-
branch AS "Branch"
38-
, intervals.label AS "Interval"
39-
, ascii_bar(1e0 * count(1) FILTER (WHERE conclusion = 'success') / count(1)) AS "Success ratio chart"
40-
, round(100e0 * count(1) FILTER (WHERE conclusion = 'success') / count(1), 1) AS "Success percent"
41-
, count(1) FILTER (WHERE created_at > now() - intervals.days) AS "Number of runs"
42-
FROM intervals
43-
JOIN runs ON runs.created_at > now() - intervals.days
44-
GROUP BY branch, intervals.days, intervals.label
45-
ORDER BY branch DESC, intervals.days
58+
format('%.1f%% (%d/%d)', 100e0 * latest.ratio, latest.num_success, latest.num_runs) AS "Today"
59+
, format('%.1f%% (%d/%d)', 100e0 * summary.ratio, summary.num_success, summary.num_runs) AS "Weekly"
60+
, summary.chart AS "Daily (desc)"
61+
FROM summary
62+
CROSS JOIN latest
4663
;

sql/flaky/jobs.sql

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ WITH report_configuration AS (
3232
AND jobs.conclusion IS NOT NULL -- ignore partially ingested information
3333
AND jobs.conclusion != 'skipped'
3434
AND jobs.conclusion != 'cancelled'
35+
AND jobs.name NOT LIKE 'check-commit%' -- these include a commit SHA and are always unique
3536
)
3637
, analyzed_job_runs AS (
3738
-- When using "Re-run failed jobs", previously successful jobs appear as successful, which could lead to

0 commit comments

Comments
 (0)