Skip to content

Commit 0bdc1eb

Browse files
committed
[CI] Add terraform resources for deploying a daily cronjob that processes LLVM commit
1 parent 1d9240c commit 0bdc1eb

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

premerge/gke_cluster/main.tf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ resource "google_container_cluster" "llvm_premerge" {
1212
# for adding windows nodes to the cluster.
1313
networking_mode = "VPC_NATIVE"
1414
ip_allocation_policy {}
15+
16+
workload_identity_config {
17+
workload_pool = "llvm-premerge-checks.svc.id.goog"
18+
}
1519
}
1620

1721
resource "google_container_node_pool" "llvm_premerge_linux_service" {
@@ -23,6 +27,10 @@ resource "google_container_node_pool" "llvm_premerge_linux_service" {
2327

2428
node_config {
2529
machine_type = "e2-highcpu-4"
30+
31+
workload_metadata_config {
32+
mode = "GKE_METADATA"
33+
}
2634
# Terraform wants to recreate the node pool everytime whe running
2735
# terraform apply unless we explicitly set this.
2836
# TODO(boomanaiden154): Look into why terraform is doing this so we do

premerge/main.tf

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,107 @@ resource "kubernetes_manifest" "metrics_deployment" {
190190

191191
depends_on = [kubernetes_namespace.metrics, kubernetes_secret.metrics_secrets]
192192
}
193+
194+
# Resources for collecting LLVM operational metrics data
195+
196+
# Service accounts and bindings to grant access to the
197+
# BigQuery API for our cronjob
198+
resource "google_service_account" "operational_metrics_gsa" {
199+
account_id = "operational-metrics-gsa"
200+
display_name = "Operational Metrics GSA"
201+
}
202+
203+
resource "google_project_iam_binding" "bigquery_jobuser_binding" {
204+
project = google_service_account.operational_metrics_gsa.project
205+
role = "roles/bigquery.jobUser"
206+
207+
members = [
208+
"serviceAccount:${google_service_account.operational_metrics_gsa.email}",
209+
]
210+
211+
depends_on = [google_service_account.operational_metrics_gsa]
212+
}
213+
214+
resource "kubernetes_namespace" "operational_metrics" {
215+
metadata {
216+
name = "operational-metrics"
217+
}
218+
provider = kubernetes.llvm-premerge-us-central
219+
}
220+
221+
resource "kubernetes_service_account" "operational_metrics_ksa" {
222+
metadata {
223+
name = "operational-metrics-ksa"
224+
namespace = "operational-metrics"
225+
226+
annotations = {
227+
"iam.gke.io/gcp-service-account" = google_service_account.operational_metrics_gsa.email
228+
}
229+
}
230+
231+
depends_on = [kubernetes_namespace.operational_metrics]
232+
}
233+
234+
resource "google_service_account_iam_binding" "workload_identity_binding" {
235+
service_account_id = google_service_account.operational_metrics_gsa.name
236+
role = "roles/iam.workloadIdentityUser"
237+
238+
members = [
239+
"serviceAccount:${google_service_account.operational_metrics_gsa.project}.svc.id.goog[operational-metrics/operational-metrics-ksa]",
240+
]
241+
242+
depends_on = [
243+
google_service_account.operational_metrics_gsa,
244+
kubernetes_service_account.operational_metrics_ksa,
245+
]
246+
}
247+
248+
# The container for scraping LLVM commits needs persistent storage
249+
# for a locally check-out llvm/llvm-project
250+
resource "kubernetes_persistent_volume_claim" "operational_metrics_pvc" {
251+
metadata {
252+
name = "operational-metrics-pvc"
253+
namespace = "operational-metrics"
254+
}
255+
256+
spec {
257+
access_modes = ["ReadWriteOnce"]
258+
resources {
259+
requests = {
260+
storage = "20Gi"
261+
}
262+
}
263+
storage_class_name = "standard-rwo"
264+
}
265+
266+
depends_on = [kubernetes_namespace.operational_metrics]
267+
}
268+
269+
resource "kubernetes_secret" "operational_metrics_secrets" {
270+
metadata {
271+
name = "operational-metrics-secrets"
272+
namespace = "operational-metrics"
273+
}
274+
275+
data = {
276+
"github-token" = data.google_secret_manager_secret_version.metrics_github_pat.secret_data
277+
"grafana-api-key" = data.google_secret_manager_secret_version.metrics_grafana_api_key.secret_data
278+
"grafana-metrics-userid" = data.google_secret_manager_secret_version.metrics_grafana_metrics_userid.secret_data
279+
}
280+
281+
type = "Opaque"
282+
provider = kubernetes.llvm-premerge-us-central
283+
depends_on = [kubernetes_namespace.operational_metrics]
284+
}
285+
286+
resource "kubernetes_manifest" "operational_metrics_cronjob" {
287+
manifest = yamldecode(file("operational_metrics_cronjob.yaml"))
288+
provider = kubernetes.llvm-premerge-us-central
289+
290+
depends_on = [
291+
kubernetes_namespace.operational_metrics,
292+
kubernetes_secret.operational_metrics_secrets,
293+
kubernetes_service_account.operational_metrics_ksa,
294+
kubernetes_persistent_volume_claim.operational_metrics_pvc,
295+
]
296+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# operational_metrics_cronjob.yaml
2+
apiVersion: batch/v1
3+
kind: CronJob
4+
metadata:
5+
name: operational-metrics-cronjob
6+
namespace: operational-metrics
7+
spec:
8+
# Midnight PDT
9+
schedule: "0 7 * * *"
10+
timeZone: "Etc/UTC"
11+
concurrencyPolicy: Forbid
12+
jobTemplate:
13+
spec:
14+
template:
15+
spec:
16+
serviceAccountName: operational-metrics-ksa
17+
volumes:
18+
- name: metrics-volume
19+
persistentVolumeClaim:
20+
claimName: operational-metrics-pvc
21+
containers:
22+
- name: process-llvm-commits
23+
image: ghcr.io/llvm/operations-metrics:latest
24+
env:
25+
- name: GITHUB_TOKEN
26+
valueFrom:
27+
secretKeyRef:
28+
name: operational-metrics-secrets
29+
key: github-token
30+
- name: GRAFANA_API_KEY
31+
valueFrom:
32+
secretKeyRef:
33+
name: operational-metrics-secrets
34+
key: grafana-api-key
35+
- name: GRAFANA_METRICS_USERID
36+
valueFrom:
37+
secretKeyRef:
38+
name: operational-metrics-secrets
39+
key: grafana-metrics-userid
40+
volumeMounts:
41+
- name: metrics-volume
42+
mountPath: "/data"
43+
resources:
44+
requests:
45+
cpu: "250m"
46+
memory: "256Mi"
47+
limits:
48+
cpu: "1"
49+
memory: "512Mi"
50+
restartPolicy: OnFailure

0 commit comments

Comments
 (0)