-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmetrics.py
More file actions
279 lines (252 loc) · 11.2 KB
/
metrics.py
File metadata and controls
279 lines (252 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import fnmatch
from typing import Dict, List
from prometheus_client import Counter, Histogram
from gh_actions_exporter.config import Relabel, RelabelType, Settings
from gh_actions_exporter.cost import Cost
from gh_actions_exporter.types import WebHook, WorkflowJob, WorkflowRun
class Metrics(object):
def __init__(self, settings: Settings):
self.settings = settings
self.cost = Cost(settings)
self.common_labelnames = [
"repository",
"workflow_name",
"repository_visibility",
]
self.workflow_labelnames = self.common_labelnames.copy() + [
"branch",
"event",
]
self.job_labelnames = self.common_labelnames.copy() + [
"job_name",
"runner_type",
"runner_labels",
]
for relabel in self.settings.job_relabelling:
self.job_labelnames.append(relabel.label)
self.workflow_rebuild = Counter(
"github_actions_workflow_rebuild_count",
"The number of workflow rebuild",
labelnames=self.workflow_labelnames,
)
self.workflow_duration = Histogram(
"github_actions_workflow_duration_seconds",
"The duration of a workflow in seconds",
labelnames=self.workflow_labelnames,
)
self.job_duration = Histogram(
"github_actions_job_duration_seconds",
"The duration of a job in seconds",
labelnames=self.job_labelnames,
)
self.workflow_status_failure = Counter(
"github_actions_workflow_failure_count",
"Count the number of workflow failure",
labelnames=self.workflow_labelnames,
)
self.workflow_status_success = Counter(
"github_actions_workflow_success_count",
"Count the number of workflow success",
labelnames=self.workflow_labelnames,
)
self.workflow_status_cancelled = Counter(
"github_actions_workflow_cancelled_count",
"Count the number of workflow cancelled",
labelnames=self.workflow_labelnames,
)
self.workflow_status_inprogress = Counter(
"github_actions_workflow_inprogress_count",
"Count the number of workflow in progress",
labelnames=self.workflow_labelnames,
)
self.workflow_status_total = Counter(
"github_actions_workflow_total_count",
"Count the total number of workflows",
labelnames=self.workflow_labelnames,
)
self.job_status_failure = Counter(
"github_actions_job_failure_count",
"Count the number of job failure",
labelnames=self.job_labelnames,
)
self.job_status_success = Counter(
"github_actions_job_success_count",
"Count the number of job success",
labelnames=self.job_labelnames,
)
self.job_status_cancelled = Counter(
"github_actions_job_cancelled_count",
"Count the number of job cancelled",
labelnames=self.job_labelnames,
)
self.job_status_inprogress = Counter(
"github_actions_job_inprogress_count",
"Count the number of job in progress",
labelnames=self.job_labelnames,
)
self.job_status_queued = Counter(
"github_actions_job_queued_count",
"Count the number of job queued",
labelnames=self.job_labelnames,
)
self.job_status_total = Counter(
"github_actions_job_total_count",
"Count the total number of jobs",
labelnames=self.job_labelnames,
)
self.job_start_duration = Histogram(
"github_actions_job_start_duration_seconds",
"Time between when a job is requested and started",
labelnames=self.job_labelnames,
)
# Metrics to sum the cost of a job
self.job_cost = Counter(
"github_actions_job_cost_count",
"Cost of a job",
labelnames=self.job_labelnames,
)
def retrieve_branch(self, workflow_run: WorkflowRun) -> str:
"""
Add the branch label to the metrics exposed by the exporter while
also taking into account the cardinality limitations of prometheus:
- For workflows triggered on pull_request event
retrieve the base branch (target branch).
- For other events, retrieve the head branch.
Then check if the branch matches any of the patterns defined in
self.settings.branches like: development/*, main, feature/*, etc.
Return the branch name if it matches any of the patterns,
otherwise return "dev".
"""
ref: str
if workflow_run.event == "pull_request":
assert workflow_run.pull_requests
ref = workflow_run.pull_requests[0].base.ref
else:
ref = workflow_run.head_branch
for branch in self.settings.branches:
if fnmatch.fnmatch(ref, branch):
return ref
return "dev"
def workflow_labels(self, webhook: WebHook) -> dict:
assert webhook.workflow_run
branch = self.retrieve_branch(webhook.workflow_run)
return dict(
workflow_name=webhook.workflow_run.name,
repository=webhook.repository.full_name,
repository_visibility=webhook.repository.visibility,
branch=branch,
event=webhook.workflow_run.event,
)
def runner_type(self, webhook: WebHook) -> str:
if set(webhook.workflow_job.labels) <= set(self.settings.github_hosted_runner_labels):
return "github-hosted"
return "self-hosted"
def runner_labels(self, webhook: WebHook) -> str:
return ','.join(webhook.workflow_job.labels)
def relabel_job_labels(
self, relabel: Relabel, labels: List[str]
) -> Dict[str, str or None]:
result = {relabel.label: relabel.default}
for label in relabel.values:
if label in labels:
result[relabel.label] = label
return result
def relabel_job_names(self, relabel: Relabel, job: WorkflowJob) -> dict:
result = {relabel.label: relabel.default}
if job.status == "queued" or job.conclusion == "skipped":
result[relabel.label] = ""
else:
for label in relabel.values:
if label in job.runner_name:
result[relabel.label] = label
return result
def job_labels(self, webhook: WebHook, settings: Settings) -> dict:
labels = dict(
runner_type=self.runner_type(webhook),
job_name=webhook.workflow_job.name,
repository_visibility=webhook.repository.visibility,
repository=webhook.repository.full_name,
workflow_name=webhook.workflow_job.workflow_name,
runner_labels=self.runner_labels(webhook),
)
for relabel in settings.job_relabelling:
if relabel.type == RelabelType.label:
labels.update(
self.relabel_job_labels(relabel, webhook.workflow_job.labels)
)
elif relabel.type == RelabelType.name:
labels.update(self.relabel_job_names(relabel, webhook.workflow_job))
return labels
def handle_workflow_rebuild(self, webhook: WebHook):
# playing safe counting rebuild when workflow is complete
# Ideally would like to find a trustworthy event to count
# when workflows starts but as far as I can remember we keep
# getting queued status multiple time for the same workflow
# and not always in_progress
labels = self.workflow_labels(webhook)
if webhook.workflow_run.conclusion and webhook.workflow_run.run_attempt > 1:
self.workflow_rebuild.labels(**labels).inc()
def handle_workflow_status(self, webhook: WebHook):
labels = self.workflow_labels(webhook)
if webhook.workflow_run.conclusion:
if webhook.workflow_run.conclusion == "success":
self.workflow_status_success.labels(**labels).inc()
elif webhook.workflow_run.conclusion == "failure":
self.workflow_status_failure.labels(**labels).inc()
elif webhook.workflow_run.conclusion == "cancelled":
self.workflow_status_cancelled.labels(**labels).inc()
self.workflow_status_total.labels(**labels).inc()
# Hoping that the in_progress status will actually be sent and
# only once
elif webhook.workflow_run.status == "in_progress":
self.workflow_status_inprogress.labels(**labels).inc()
def handle_workflow_duration(self, webhook: WebHook):
if webhook.workflow_run.conclusion:
labels = self.workflow_labels(webhook)
duration = (
webhook.workflow_run.updated_at.timestamp()
- webhook.workflow_run.run_started_at.timestamp()
)
self.workflow_duration.labels(**labels).observe(duration)
def handle_job_status(self, webhook: WebHook, settings: Settings):
labels = self.job_labels(webhook, settings)
if webhook.workflow_job.conclusion:
if webhook.workflow_job.conclusion == "success":
self.job_status_success.labels(**labels).inc()
elif webhook.workflow_job.conclusion == "failure":
self.job_status_failure.labels(**labels).inc()
elif webhook.workflow_job.conclusion == "cancelled":
self.job_status_cancelled.labels(**labels).inc()
self.job_status_total.labels(**labels).inc()
elif webhook.workflow_job.status == "in_progress":
self.job_status_inprogress.labels(**labels).inc()
elif webhook.workflow_job.status == "queued":
self.job_status_queued.labels(**labels).inc()
def _get_job_duration(self, webhook: WebHook) -> float:
if webhook.workflow_job.conclusion:
return (
webhook.workflow_job.completed_at.timestamp()
- webhook.workflow_job.started_at.timestamp()
)
return 0
def handle_job_duration(self, webhook: WebHook, settings: Settings):
labels = self.job_labels(webhook, settings)
if webhook.workflow_job.conclusion:
duration = self._get_job_duration(webhook)
self.job_duration.labels(**labels).observe(duration)
elif webhook.workflow_job.status == "in_progress":
job_start_duration = webhook.workflow_job.started_at.timestamp() - (
webhook.workflow_job.created_at.timestamp()
)
self.job_start_duration.labels(**labels).observe(job_start_duration)
def flavor_type(self, webhook: WebHook) -> str or None:
for label in webhook.workflow_job.labels:
if label in self.settings.job_costs:
return label
return None
def handle_job_cost(self, webhook: WebHook, settings: Settings):
labels = self.job_labels(webhook, settings)
flavor = self.flavor_type(webhook)
if webhook.workflow_job.conclusion:
cost = self.cost.get_job_cost(webhook.workflow_job, flavor)
self.job_cost.labels(**labels).inc(cost)