Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions devtools/Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ ARGO_WORKFLOWS_HELM_CHART_VERSION = os.getenv("ARGO_WORKFLOWS_HELM_CHART_VERSION
ARGO_WORKFLOWS_IMAGE_TAG = os.getenv("ARGO_WORKFLOWS_IMAGE_TAG", "v3.6.0")
AIRFLOW_HELM_CHART_VERSION = os.getenv("AIRFLOW_HELM_CHART_VERSION", "1.15.0")
AIRFLOW_IMAGE_TAG = os.getenv("AIRFLOW_IMAGE_TAG", "2.10.4")
ARGO_EVENTS_HELM_CHART_VERSION = os.getenv("ARGO_EVENTS_HELM_CHART_VERSION", "2.4.8")
ARGO_EVENTS_IMAGE_TAG = os.getenv("ARGO_EVENTS_IMAGE_TAG", "v1.9.2")
JOBSET_VERSION = os.getenv("JOBSET_VERSION", "v0.8.2")

# ---------------------------------------------------------------------------
# Component dependency graph
Expand All @@ -30,6 +33,8 @@ components = {
"ddb-local": [],
"sfn-local": ["ddb-local"],
"airflow": ["postgresql"],
"argo-events": ["argo-workflows"],
"jobset": [],
}

# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -93,6 +98,8 @@ load('./tilt/localbatch.tiltfile', 'setup_localbatch')
load('./tilt/ddb_local.tiltfile', 'setup_ddb_local')
load('./tilt/sfn_local.tiltfile', 'setup_sfn_local')
load('./tilt/airflow.tiltfile', 'setup_airflow')
load('./tilt/argo_events.tiltfile', 'setup_argo_events')
load('./tilt/jobset.tiltfile', 'setup_jobset')

_SETUP = {
"minio": setup_minio,
Expand All @@ -104,6 +111,8 @@ _SETUP = {
"ddb-local": setup_ddb_local,
"sfn-local": setup_sfn_local,
"airflow": setup_airflow,
"argo-events": setup_argo_events,
"jobset": setup_jobset,
}

# ---------------------------------------------------------------------------
Expand All @@ -117,6 +126,9 @@ ctx = struct(
argo_workflows_image=ARGO_WORKFLOWS_IMAGE_TAG,
airflow_chart=AIRFLOW_HELM_CHART_VERSION,
airflow_image=AIRFLOW_IMAGE_TAG,
argo_events_chart=ARGO_EVENTS_HELM_CHART_VERSION,
argo_events_image=ARGO_EVENTS_IMAGE_TAG,
jobset=JOBSET_VERSION,
),
)

Expand Down
2 changes: 2 additions & 0 deletions devtools/pick_services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ SERVICE_OPTIONS=(
"metadata-service"
"ui"
"argo-workflows"
"argo-events"
"jobset"
"localbatch"
"ddb-local"
"sfn-local"
Expand Down
64 changes: 64 additions & 0 deletions devtools/tilt/argo_events.tiltfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
load('ext://helm_remote', 'helm_remote')
load('./_result.tiltfile', 'new_result')

def setup_argo_events(ctx):
helm_remote(
'argo-events',
version=ctx.versions.argo_events_chart,
repo_name='argo',
repo_url='https://argoproj.github.io/argo-helm',
set=[
'crds.install=true',
'controller.metrics.enabled=true',
'controller.livenessProbe.initialDelaySeconds=1',
'controller.readinessProbe.initialDelaySeconds=1',
'controller.resources.requests.memory=64Mi',
'controller.resources.requests.cpu=25m',
'controller.resources.limits.memory=128Mi',
'controller.resources.limits.cpu=50m',
'configs.jetstream.streamConfig.maxAge=72h',
'configs.jetstream.streamConfig.replicas=1',
'controller.rbac.enabled=true',
'controller.rbac.namespaced=false',
'controller.serviceAccount.create=true',
'controller.serviceAccount.name=argo-events-events-controller-sa',
'configs.jetstream.versions[0].configReloaderImage=natsio/nats-server-config-reloader:latest',
'configs.jetstream.versions[0].metricsExporterImage=natsio/prometheus-nats-exporter:latest',
'configs.jetstream.versions[0].natsImage=nats:latest',
'configs.jetstream.versions[0].startCommand=/nats-server',
'configs.jetstream.versions[0].version=latest',
'configs.jetstream.versions[1].configReloaderImage=natsio/nats-server-config-reloader:latest',
'configs.jetstream.versions[1].metricsExporterImage=natsio/prometheus-nats-exporter:latest',
'configs.jetstream.versions[1].natsImage=nats:2.9.15',
'configs.jetstream.versions[1].startCommand=/nats-server',
'configs.jetstream.versions[1].version=2.9.15',
'global.image.tag=%s' % ctx.versions.argo_events_image,
]
)

k8s_yaml(read_file('./tilt/k8s/argo-events-rbac.yaml'))
k8s_yaml(read_file('./tilt/k8s/argo-events-eventbus.yaml'))
k8s_yaml(read_file('./tilt/k8s/argo-events-eventsource.yaml'))
k8s_yaml(read_file('./tilt/k8s/argo-events-webhook-svc.yaml'))

local_resource(
name='argo-events-webhook-eventsource-svc',
serve_cmd='while ! kubectl get service/argo-events-webhook-eventsource-svc-tilt >/dev/null 2>&1 || ! kubectl get pods -l eventsource-name=argo-events-webhook -o jsonpath="{.items[*].status.phase}" | grep -q "Running"; do sleep 5; done && kubectl port-forward service/argo-events-webhook-eventsource-svc-tilt 12000:12000',
links=[link('http://localhost:12000/metaflow-event', 'Argo Events Webhook')],
labels=['argo-events'],
)

k8s_resource('argo-events-controller-manager', labels=['argo-events'])

return new_result(
config={
"METAFLOW_ARGO_EVENTS_EVENT": "metaflow-event",
"METAFLOW_ARGO_EVENTS_EVENT_BUS": "default",
"METAFLOW_ARGO_EVENTS_EVENT_SOURCE": "argo-events-webhook",
"METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT": "operate-workflow-sa",
"METAFLOW_ARGO_EVENTS_WEBHOOK_AUTH": "service",
"METAFLOW_ARGO_EVENTS_INTERNAL_WEBHOOK_URL": "http://argo-events-webhook-eventsource-svc:12000/metaflow-event",
"METAFLOW_ARGO_EVENTS_WEBHOOK_URL": "http://localhost:12000/metaflow-event",
},
config_resources=['argo-events-controller-manager', 'argo-events-webhook-eventsource-svc'],
)
14 changes: 14 additions & 0 deletions devtools/tilt/jobset.tiltfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
load('./_result.tiltfile', 'new_result')

def setup_jobset(ctx):
jobset_manifest_url = "https://github.com/kubernetes-sigs/jobset/releases/download/%s/manifests.yaml" % ctx.versions.jobset
k8s_yaml(local("curl -sSL %s" % jobset_manifest_url))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Remote manifest fetched without integrity check

curl -sSL downloads the jobset manifest from GitHub at every tilt up without verifying its checksum. While the URL is version-pinned, an upstream compromise or MITM between the developer and GitHub would apply arbitrary manifests to the cluster. Consider caching the manifest in-tree (as done with the RBAC file) or at least documenting the known SHA so developers can verify out-of-band.


k8s_resource('jobset-controller-manager', labels=['jobset'])

k8s_yaml(read_file('./tilt/k8s/jobset-rbac.yaml'))

return new_result(
config={"METAFLOW_KUBERNETES_JOBSET_ENABLED": "true"},
config_resources=['jobset-controller-manager'],
)
17 changes: 17 additions & 0 deletions devtools/tilt/k8s/argo-events-eventbus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: argoproj.io/v1alpha1
kind: EventBus
metadata:
name: default
namespace: default
spec:
jetstream:
version: "2.9.15"
replicas: 3
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 EventBus replicas=3 is heavy for a dev environment

replicas: 3 launches three JetStream pods, each consuming 100m CPU and 128Mi memory, totalling 300m CPU and ~384Mi. In a typical single-node local cluster (kind/minikube/docker-desktop) this provides no HA benefit since all three pods land on the same node. replicas: 1 is the standard choice for devtools setups and keeps resource consumption in line with the rest of the stack.

Suggested change
replicas: 3
replicas: 1

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

containerTemplate:
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 100m
memory: 128Mi
24 changes: 24 additions & 0 deletions devtools/tilt/k8s/argo-events-eventsource.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: argoproj.io/v1alpha1
kind: EventSource
metadata:
name: argo-events-webhook
namespace: default
spec:
template:
container:
resources:
requests:
cpu: 25m
memory: 50Mi
limits:
cpu: 25m
memory: 50Mi
service:
ports:
- port: 12000
targetPort: 12000
webhook:
metaflow-event:
port: "12000"
endpoint: /metaflow-event
method: POST
52 changes: 52 additions & 0 deletions devtools/tilt/k8s/argo-events-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: operate-workflow-sa
namespace: default
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: operate-workflow-role
namespace: default
rules:
- apiGroups: [argoproj.io]
resources: [workflows, workflowtemplates, cronworkflows, clusterworkflowtemplates]
verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: operate-workflow-role-binding
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: operate-workflow-role
subjects:
- kind: ServiceAccount
name: operate-workflow-sa
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: view-events-role
namespace: default
rules:
- apiGroups: [argoproj.io]
resources: [eventsources, eventbuses, sensors]
verbs: [get, list, watch]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: view-events-role-binding
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: view-events-role
subjects:
- kind: ServiceAccount
name: argo-workflows
namespace: default
15 changes: 15 additions & 0 deletions devtools/tilt/k8s/argo-events-webhook-svc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: argo-events-webhook-eventsource-svc-tilt
namespace: default
spec:
ports:
- port: 12000
protocol: TCP
targetPort: 12000
selector:
controller: eventsource-controller
eventsource-name: argo-events-webhook
owner-name: argo-events-webhook
type: ClusterIP
21 changes: 21 additions & 0 deletions devtools/tilt/k8s/jobset-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: jobset-full-access
rules:
- apiGroups: [jobset.x-k8s.io]
resources: [jobsets]
verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: default-jobset-binding
subjects:
- kind: ServiceAccount
name: default
namespace: default
roleRef:
kind: ClusterRole
name: jobset-full-access
apiGroup: rbac.authorization.k8s.io
Loading