Skip to content

Commit 9a8b9dc

Browse files
add extra pipeline for kubeletstats monitoring in fargate pods
1 parent 9f5efbf commit 9a8b9dc

12 files changed

+645
-16
lines changed

charts/agent/Chart.lock

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,8 @@ dependencies:
2020
- name: opentelemetry-collector
2121
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
2222
version: 0.130.2
23-
digest: sha256:1463a6ca81d2cffd7c7cdf60a8bbc1f490ca721a50328f17a9b1f8d06a1dc6b1
24-
generated: "2025-08-26T11:41:10.527947-04:00"
23+
- name: opentelemetry-operator
24+
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
25+
version: 0.93.1
26+
digest: sha256:b5548207946689a925841cca60cf59984043d00886dcb93b407c144630af909f
27+
generated: "2025-09-29T16:49:24.216039-07:00"

charts/agent/Chart.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ dependencies:
4040
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
4141
alias: gateway
4242
condition: gatewayDeployment.enabled
43+
- name: opentelemetry-operator
44+
version: 0.93.1
45+
repository: https://open-telemetry.github.io/opentelemetry-helm-charts
46+
alias: fargate-sidecar-injector
47+
condition: node.fargateMode
4348
maintainers:
4449
- name: Observe
4550

charts/agent/templates/_config-processors.tpl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ attributes/debug_source_cadvisor_metrics:
112112
{{- end -}}
113113
{{- end -}}
114114

115+
{{- define "config.processors.attributes.sidecar_kubeletstats_metrics" -}}
116+
attributes/debug_source_sidecar_kubeletstats_metrics:
117+
actions:
118+
- key: debug_source
119+
action: insert
120+
value: sidecar_kubeletstats_metrics
121+
{{- end -}}
122+
115123
{{- define "config.processors.attributes.drop_container_info" -}}
116124
resource/drop_container_info:
117125
attributes:
@@ -126,6 +134,21 @@ resource/drop_service_name:
126134
key: service.name
127135
{{- end -}}
128136

137+
{{- define "config.processors.metricstransform.duplicate_k8s_cpu_metrics" -}}
138+
# convert new k8s metric names to the names our Kubernetes Explorer relies on
139+
metricstransform/duplicate_k8s_cpu_metrics:
140+
transforms:
141+
- include: container.cpu.usage
142+
action: insert
143+
new_name: container.cpu.utilization
144+
- include: k8s.pod.cpu.usage
145+
action: insert
146+
new_name: k8s.pod.cpu.utilization
147+
- include: k8s.node.cpu.usage
148+
action: insert
149+
new_name: k8s.node.cpu.utilization
150+
{{- end -}}
151+
129152
{{- define "config.processors.filter.drop_long_spans" -}}
130153
{{- if eq .Values.node.forwarder.traces.maxSpanDuration "none" }}
131154
{{- else if (regexMatch "^[0-9]+(ns|us|ms|s|m|h)$" .Values.node.forwarder.traces.maxSpanDuration) }}

charts/agent/templates/_config-receivers.tpl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,26 @@ prometheus/cadvisor:
137137
replacement: /api/v1/nodes/$$1/proxy/metrics/cadvisor
138138
{{ end }}
139139
{{ end }}
140+
141+
{{- define "config.receivers.prometheus.kubeletstats" -}}
142+
prometheus/kubeletstats:
143+
config:
144+
scrape_configs:
145+
- job_name: 'kubernetes-nodes-kubeletstats'
146+
scheme: https
147+
tls_config:
148+
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
149+
insecure_skip_verify: true
150+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
151+
152+
kubernetes_sd_configs:
153+
- role: node
154+
155+
relabel_configs:
156+
- target_label: __address__
157+
replacement: kubernetes.default.svc:443
158+
- source_labels: [__meta_kubernetes_node_name]
159+
regex: (.+)
160+
target_label: __metrics_path__
161+
replacement: /api/v1/nodes/$$1/proxy/stats/summary
162+
{{- end -}}

charts/agent/templates/_config.tpl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
{{- toYaml $config | indent 2 }}
1313
{{- end }}
1414

15+
{{- define "observe.sidecar.applyFargateSidecarMetricsConfig" -}}
16+
{{- $values := deepCopy .Values }}
17+
{{- $data := dict "Values" $values | mustMergeOverwrite (deepCopy .) }}
18+
{{- $config := mustMergeOverwrite ( include "observe.sidecar.fargateSidecarMetrics.config" $data | fromYaml ) ($values.agent.config.fargateSidecarMetrics) ($values.agent.config.global.overrides) -}}
19+
{{- toYaml $config | indent 2 }}
20+
{{- end }}
21+
22+
1523
{{- define "observe.deployment.applyPrometheusScraperConfig" -}}
1624
{{- $values := deepCopy .Values }}
1725
{{- $data := dict "Values" $values | mustMergeOverwrite (deepCopy .) }}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{{- define "observe.sidecar.fargateSidecarMetrics.config" -}}
2+
3+
{{- $kubeletstatsExporters := (list "otlphttp" "debug") -}}
4+
5+
receivers:
6+
kubeletstats:
7+
collection_interval: {{.Values.node.containers.metrics.interval}}
8+
auth_type: 'serviceAccount'
9+
endpoint: https://kubernetes.default.svc/api/v1/nodes/${env:K8S_NODE_NAME}/proxy
10+
node: '${env:K8S_NODE_NAME}'
11+
insecure_skip_verify: true
12+
k8s_api_config:
13+
auth_type: serviceAccount
14+
metric_groups:
15+
- node
16+
- pod
17+
- container
18+
metrics:
19+
# The following metrics are optional and must be enabled manually as per:
20+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/kubeletstatsreceiver/documentation.md#optional-metrics
21+
container.cpu.usage:
22+
enabled: true
23+
container.uptime:
24+
enabled: true
25+
k8s.container.cpu.node.utilization:
26+
enabled: true
27+
k8s.container.cpu_limit_utilization:
28+
enabled: true
29+
k8s.container.cpu_request_utilization:
30+
enabled: true
31+
k8s.container.memory.node.utilization:
32+
enabled: true
33+
k8s.container.memory_limit_utilization:
34+
enabled: true
35+
k8s.container.memory_request_utilization:
36+
enabled: true
37+
k8s.node.cpu.usage:
38+
enabled: true
39+
k8s.node.uptime:
40+
enabled: true
41+
k8s.pod.cpu.node.utilization:
42+
enabled: true
43+
k8s.pod.cpu.usage:
44+
enabled: true
45+
k8s.pod.cpu_limit_utilization:
46+
enabled: true
47+
k8s.pod.cpu_request_utilization:
48+
enabled: true
49+
k8s.pod.memory.node.utilization:
50+
enabled: true
51+
k8s.pod.memory_limit_utilization:
52+
enabled: true
53+
k8s.pod.memory_request_utilization:
54+
enabled: true
55+
k8s.pod.uptime:
56+
enabled: true
57+
extra_metadata_labels:
58+
- container.id
59+
60+
exporters:
61+
otlphttp:
62+
endpoint: http://observe-agent-forwarder.observe.svc:4318
63+
debug:
64+
verbosity: detailed
65+
66+
service:
67+
pipelines:
68+
{{- if .Values.node.containers.metrics.enabled }}
69+
metrics/kubeletstats:
70+
receivers: [kubeletstats] # should add processors back eventually
71+
exporters: [{{ join ", " $kubeletstatsExporters }}]
72+
{{- end -}}
73+
{{- end }}

charts/agent/templates/_node-logs-metrics-config.tpl

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ processors:
157157
{{- include "config.processors.batch" . | nindent 2 }}
158158
{{- include "config.processors.attributes.k8sattributes" . | nindent 2 }}
159159
{{- include "config.processors.resource.observe_common" . | nindent 2 }}
160+
{{- include "config.processors.metricstransform.duplicate_k8s_cpu_metrics" . | nindent 2 }}
160161

161162
# attributes to append to objects
162163
attributes/debug_source_pod_logs:
@@ -175,18 +176,6 @@ processors:
175176
action: insert
176177
value: kubeletstats_metrics
177178

178-
# convert new k8s metric names to the names our Kubernetes Explorer relies on
179-
metricstransform/duplicate_k8s_cpu_metrics:
180-
transforms:
181-
- include: container.cpu.usage
182-
action: insert
183-
new_name: container.cpu.utilization
184-
- include: k8s.pod.cpu.usage
185-
action: insert
186-
new_name: k8s.pod.cpu.utilization
187-
- include: k8s.node.cpu.usage
188-
action: insert
189-
new_name: k8s.node.cpu.utilization
190179

191180
# Create intermediate lists for pipeline arrays to then modify based on values.yaml
192181
{{- $logsExporters := (list "otlphttp/observe/base") -}}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{{- if .Values.node.fargateSidecar.enabled }}
2+
apiVersion: opentelemetry.io/v1beta1
3+
kind: OpenTelemetryCollector
4+
metadata:
5+
name: fargate-sidecar-metrics
6+
spec:
7+
mode: sidecar
8+
env:
9+
- name: K8S_NODE_NAME
10+
valueFrom:
11+
fieldRef:
12+
fieldPath: spec.nodeName
13+
config:
14+
{{- include "observe.sidecar.applyFargateSidecarMetricsConfig" . | nindent 4 }}
15+
initContainers:
16+
- name: kube-cluster-info
17+
image: observeinc/kube-cluster-info:v0.11.5
18+
imagePullPolicy: Always
19+
env:
20+
- name: NAMESPACE
21+
valueFrom:
22+
fieldRef:
23+
fieldPath: metadata.namespace
24+
{{- end }}

charts/agent/values.yaml

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ node:
4747
# -- Enables the node-logs-metrics agent daemonset for collection of node logs and metrics.
4848
# The nodes on which metrics and logs are collected can be configured via `affinity` in the `node-logs-metrics` section below.
4949
# This should be set to false to disable the node-log-metrics daemonset when running in a serverless environment (ex: EKS Fargate).
50-
enabled: true
50+
enabled: false
51+
fargateMode: false
5152
# collects host level metrics from node
5253
metrics:
5354
enabled: true
@@ -100,6 +101,7 @@ node:
100101
# this resolves issues similar to https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/26481#issuecomment-1720797914 for `no such host` or `connection refused`.
101102
useNodeIp: false
102103
forwarder:
104+
mode: deployment
103105
enabled: true
104106
traces:
105107
enabled: true
@@ -229,6 +231,10 @@ agent:
229231
# exporters:
230232
# - otlphttp/extra
231233
# - otlphttp/observe/forward/trace
234+
235+
# -- Additional OTel collector config for fargate-sidecar-metrics custom resource
236+
fargateSidecarMetrics:
237+
# Put any OTel config overrides here.
232238

233239
# -- Additional OTel collector config for gateway deployment
234240
gateway:
@@ -951,7 +957,7 @@ monitor:
951957
forwarder:
952958
# -- The forwarder is run as a daemonset by default, but can be run as a deployment by setting mode to "deployment". Deployment mode
953959
# must be used when running in a serverless environment (ex: EKS Fargate) where daemonsets are not supported.
954-
mode: daemonset
960+
mode: deployment
955961

956962
# -- The `replicaCount` is only used when `mode` is set to "deployment". It is ignored when `mode` is set to "daemonset".
957963
# In deployment mode, this sets the number of replicas (ie the number of forwarder pods to run).
@@ -1245,3 +1251,27 @@ gateway:
12451251
- name: observe-agent-deployment-config
12461252
mountPath: /observe-agent-conf
12471253
# ----------------------------------------- #
1254+
image:
1255+
repository: "otel/opentelemetry-collector-k8s"
1256+
1257+
1258+
fargate-sidecar-injector:
1259+
1260+
# -- This is an otel operator that will inject a sidecar container into all pods in the cluster. This is only needed when running
1261+
# in a serverless environment (ex: EKS Fargate) where daemonsets are not supported.
1262+
1263+
replicaCount: 1
1264+
1265+
# ----------------------------------------- #
1266+
# Different for each deployment/daemonset #
1267+
nameOverride: "fargate-sidecar-injector"
1268+
# !!! IMPORTANT !!! This needs to have same value as namespaceOverride in cluster above
1269+
namespaceOverride: "observe"
1270+
# for now, use defaults for the rest of the values
1271+
# ----------------------------------------- #
1272+
manager:
1273+
collectorImage:
1274+
repository: observeinc/observe-agent
1275+
tag: 2.8.1
1276+
1277+

cluster_role.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: otel-sidecar-role
5+
rules:
6+
- apiGroups: [""]
7+
resources:
8+
- nodes
9+
- nodes/proxy
10+
- namespaces
11+
- pods
12+
verbs: ["get", "list", "watch"]
13+
14+
- apiGroups: ["apps"]
15+
resources:
16+
- replicasets
17+
verbs: ["get", "list", "watch"]
18+
19+
---
20+
apiVersion: rbac.authorization.k8s.io/v1
21+
kind: ClusterRoleBinding
22+
metadata:
23+
name: otel-sidecar-role-binding
24+
subjects:
25+
- kind: ServiceAccount
26+
name: my-otel-demo
27+
namespace: demo-fargate
28+
roleRef:
29+
kind: ClusterRole
30+
name: otel-sidecar-role
31+
apiGroup: rbac.authorization.k8s.io

0 commit comments

Comments
 (0)