Skip to content

Commit 16c8b75

Browse files
khrmclaude
andcommitted
test: add e2e test for OpenCensus to OpenTelemetry metrics migration
Adds TestOTelMetrics, a consolidated e2e test for the OC→OTel metrics migration in Pipelines-as-Code (PR #2567). The test scrapes two pods: Controller (app.kubernetes.io/name=controller): - Asserts http_client_* metrics from knative k8s client OTel instrumentation - Asserts go_* runtime metrics - Checks PAC application metrics (pipelines_as_code_*) are absent/present - Asserts old OC metric names are absent Watcher (app.kubernetes.io/name=watcher): - Asserts kn_workqueue_* metrics (watcher uses knative reconciler) - Asserts go_* runtime metrics Verified locally with PAC controller and watcher deployed to kind via ko. Relates to #2567 Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent 74939ef commit 16c8b75

1 file changed

Lines changed: 304 additions & 0 deletions

File tree

test/metrics_otel_test.go

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
//go:build e2e
2+
// +build e2e
3+
4+
// Copyright 2026 The Tekton Authors
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
package test
19+
20+
import (
21+
"context"
22+
"fmt"
23+
"strings"
24+
"testing"
25+
"time"
26+
27+
dto "github.com/prometheus/client_model/go"
28+
"github.com/prometheus/common/expfmt"
29+
"github.com/prometheus/common/model"
30+
corev1 "k8s.io/api/core/v1"
31+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/client-go/kubernetes"
33+
"k8s.io/client-go/tools/clientcmd"
34+
)
35+
36+
const (
37+
// pacNamespace is the namespace where Pipelines-as-Code is installed.
38+
pacNamespace = "pipelines-as-code"
39+
// pacControllerMetricsPort is the Prometheus metrics port on the PAC controller pod.
40+
pacControllerMetricsPort = "9090"
41+
)
42+
43+
// pacKubeClient builds a kubernetes client from the default kubeconfig.
44+
func pacKubeClient(t *testing.T) kubernetes.Interface {
45+
t.Helper()
46+
rules := clientcmd.NewDefaultClientConfigLoadingRules()
47+
cfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, &clientcmd.ConfigOverrides{}).ClientConfig()
48+
if err != nil {
49+
t.Fatalf("Failed to build kubeconfig: %v", err)
50+
}
51+
return kubernetes.NewForConfigOrDie(cfg)
52+
}
53+
54+
// scrapePACPodMetrics scrapes the /metrics endpoint of a PAC pod selected
55+
// by the given label selector via the Kubernetes API server proxy.
56+
// It returns an error instead of calling t.Fatalf so it can be retried
57+
// inside polling loops without aborting the test on transient failures.
58+
func scrapePACPodMetrics(ctx context.Context, kubeClient kubernetes.Interface, labelSelector string) (map[string]*dto.MetricFamily, error) {
59+
pods, err := kubeClient.CoreV1().Pods(pacNamespace).List(ctx, metav1.ListOptions{
60+
LabelSelector: labelSelector,
61+
})
62+
if err != nil {
63+
return nil, err
64+
}
65+
66+
var podName string
67+
for _, pod := range pods.Items {
68+
if pod.Status.Phase != corev1.PodRunning {
69+
continue
70+
}
71+
allReady := true
72+
if len(pod.Status.ContainerStatuses) == 0 {
73+
allReady = false
74+
}
75+
for _, cs := range pod.Status.ContainerStatuses {
76+
if !cs.Ready {
77+
allReady = false
78+
break
79+
}
80+
}
81+
if allReady {
82+
podName = pod.Name
83+
break
84+
}
85+
}
86+
if podName == "" {
87+
return nil, fmt.Errorf("no Running/Ready PAC pod found for selector %q in namespace %s", labelSelector, pacNamespace)
88+
}
89+
90+
result := kubeClient.
91+
CoreV1().
92+
RESTClient().
93+
Get().
94+
Resource("pods").
95+
Name(podName + ":" + pacControllerMetricsPort).
96+
Namespace(pacNamespace).
97+
SubResource("proxy").
98+
Suffix("metrics").
99+
Do(ctx)
100+
101+
body, err := result.Raw()
102+
if err != nil {
103+
return nil, err
104+
}
105+
106+
parser := expfmt.NewTextParser(model.LegacyValidation)
107+
families, err := parser.TextToMetricFamilies(strings.NewReader(string(body)))
108+
if err != nil {
109+
return nil, err
110+
}
111+
return families, nil
112+
}
113+
114+
// waitForPACMetric polls the pod matching labelSelector until the named metric
115+
// family appears. Transient scrape errors are logged and retried until timeout.
116+
func waitForPACMetric(ctx context.Context, t *testing.T, kubeClient kubernetes.Interface, labelSelector, metricName string, timeout time.Duration) map[string]*dto.MetricFamily {
117+
t.Helper()
118+
ctx, cancel := context.WithTimeout(ctx, timeout)
119+
defer cancel()
120+
for {
121+
families, err := scrapePACPodMetrics(ctx, kubeClient, labelSelector)
122+
if err == nil {
123+
if _, ok := families[metricName]; ok {
124+
return families
125+
}
126+
} else {
127+
t.Logf("Retrying metrics scrape: %v", err)
128+
}
129+
select {
130+
case <-ctx.Done():
131+
t.Fatalf("Timed out waiting for metric %q to appear (waited %v): %v", metricName, timeout, ctx.Err())
132+
return nil
133+
case <-time.After(5 * time.Second):
134+
}
135+
}
136+
}
137+
138+
// TestOTelMetrics is a consolidated e2e test for the OpenCensus-to-OpenTelemetry
139+
// metrics migration in Pipelines-as-Code (PR #2567). It scrapes the PAC
140+
// controller pod's /metrics endpoint on port 9090 to verify:
141+
//
142+
// - Infrastructure metrics use new OTel-based naming:
143+
// - http_client_request_duration_seconds (knative k8s client instrumentation)
144+
// - go_* runtime metrics
145+
// - PAC application metrics are registered:
146+
// - pipelines_as_code_pipelinerun_count_total
147+
// - pipelines_as_code_pipelinerun_duration_seconds_sum_total
148+
// - pipelines_as_code_running_pipelineruns_count
149+
// - pipelines_as_code_git_provider_api_request_count_total
150+
// - Old OpenCensus metric names are absent
151+
//
152+
// Application counter metrics appear after PAC processes its first PipelineRun.
153+
// Infrastructure metrics (http_client_*, go_*) appear at startup.
154+
func TestOTelMetrics(t *testing.T) {
155+
ctx := context.Background()
156+
kubeClient := pacKubeClient(t)
157+
158+
// ========== Wait for http_client metrics to appear ==========
159+
// PAC uses the knative k8s client OTel instrumentation which records
160+
// http_client_request_duration_seconds for all API server calls.
161+
162+
t.Log("Waiting for http_client_request_duration_seconds to appear on PAC controller")
163+
families := waitForPACMetric(ctx, t, kubeClient, "app.kubernetes.io/name=controller,app.kubernetes.io/part-of=pipelines-as-code", "http_client_request_duration_seconds", 2*time.Minute)
164+
t.Logf("Scraped %d metric families from PAC controller", len(families))
165+
166+
// ========== Infrastructure metric assertions (OTel renames) ==========
167+
168+
t.Run("Renames/k8s_client_uses_http_client_prefix", func(t *testing.T) {
169+
found := false
170+
for name := range families {
171+
if strings.HasPrefix(name, "http_client_") {
172+
found = true
173+
break
174+
}
175+
}
176+
if !found {
177+
t.Error("Expected at least one http_client_* metric from knative k8s client instrumentation, found none")
178+
}
179+
})
180+
181+
t.Run("Renames/go_runtime_uses_standard_prefix", func(t *testing.T) {
182+
found := false
183+
for name := range families {
184+
if strings.HasPrefix(name, "go_") {
185+
found = true
186+
break
187+
}
188+
}
189+
if !found {
190+
t.Error("Expected standard go_* runtime metrics, found none")
191+
}
192+
})
193+
194+
// ========== PAC application metric assertions ==========
195+
// Counter metrics appear after the first PipelineRun is processed.
196+
// The gauge appears via registered callback once the collection cycle runs.
197+
// We log presence/absence rather than failing — in a fresh install with no
198+
// PipelineRun activity, counters will not yet have been observed.
199+
200+
t.Run("Application/pipelinerun_count", func(t *testing.T) {
201+
found := false
202+
for name := range families {
203+
if name == "pipelines_as_code_pipelinerun_count_total" || name == "pipelines_as_code_pipelinerun_count" {
204+
found = true
205+
break
206+
}
207+
}
208+
if found {
209+
t.Log("pipelines_as_code_pipelinerun_count(_total) found")
210+
} else {
211+
t.Log("pipelines_as_code_pipelinerun_count not yet present (no PipelineRuns processed yet)")
212+
}
213+
})
214+
215+
t.Run("Application/pipelinerun_duration_seconds_sum", func(t *testing.T) {
216+
found := false
217+
for name := range families {
218+
if strings.HasPrefix(name, "pipelines_as_code_pipelinerun_duration_seconds_sum") {
219+
found = true
220+
break
221+
}
222+
}
223+
if found {
224+
t.Log("pipelines_as_code_pipelinerun_duration_seconds_sum found")
225+
} else {
226+
t.Log("pipelines_as_code_pipelinerun_duration_seconds_sum not yet present (no PipelineRuns processed yet)")
227+
}
228+
})
229+
230+
t.Run("Application/running_pipelineruns_count", func(t *testing.T) {
231+
if _, ok := families["pipelines_as_code_running_pipelineruns_count"]; ok {
232+
t.Log("pipelines_as_code_running_pipelineruns_count found")
233+
} else {
234+
t.Log("pipelines_as_code_running_pipelineruns_count not yet present (gauge callback not yet called)")
235+
}
236+
})
237+
238+
t.Run("Application/git_provider_api_request_count", func(t *testing.T) {
239+
found := false
240+
for name := range families {
241+
if name == "pipelines_as_code_git_provider_api_request_count_total" || name == "pipelines_as_code_git_provider_api_request_count" {
242+
found = true
243+
break
244+
}
245+
}
246+
if found {
247+
t.Log("pipelines_as_code_git_provider_api_request_count(_total) found")
248+
} else {
249+
t.Log("pipelines_as_code_git_provider_api_request_count not yet present (no git API calls yet)")
250+
}
251+
})
252+
253+
// ========== Removed OpenCensus metrics ==========
254+
// TODO: Remove these assertions in a future release once no OC-based
255+
// release is supported.
256+
257+
t.Run("Removed/opencensus_pac_metrics", func(t *testing.T) {
258+
ocPrefixes := []string{
259+
"pipelines_as_code/",
260+
"tekton_pipelines_as_code_",
261+
}
262+
for name := range families {
263+
for _, prefix := range ocPrefixes {
264+
if strings.HasPrefix(name, prefix) {
265+
t.Errorf("Old OC metric %q still present; expected removal after OTel migration", name)
266+
}
267+
}
268+
}
269+
})
270+
271+
// ========== Watcher pod metrics ==========
272+
// The PAC watcher uses the knative reconciler workqueue, so it exposes
273+
// kn_workqueue_* metrics in addition to http_client_* and go_*.
274+
275+
t.Log("Waiting for PAC watcher pod metrics to be available")
276+
watcherFamilies := waitForPACMetric(ctx, t, kubeClient, "app.kubernetes.io/name=watcher,app.kubernetes.io/part-of=pipelines-as-code", "go_goroutines", 2*time.Minute)
277+
t.Logf("Scraped %d metric families from PAC watcher", len(watcherFamilies))
278+
279+
t.Run("Watcher/workqueue_uses_kn_prefix", func(t *testing.T) {
280+
found := false
281+
for name := range watcherFamilies {
282+
if strings.HasPrefix(name, "kn_workqueue_") {
283+
found = true
284+
break
285+
}
286+
}
287+
if !found {
288+
t.Error("Expected at least one kn_workqueue_* metric on the PAC watcher, found none")
289+
}
290+
})
291+
292+
t.Run("Watcher/go_runtime_metrics_present", func(t *testing.T) {
293+
found := false
294+
for name := range watcherFamilies {
295+
if strings.HasPrefix(name, "go_") {
296+
found = true
297+
break
298+
}
299+
}
300+
if !found {
301+
t.Error("Expected standard go_* runtime metrics on PAC watcher, found none")
302+
}
303+
})
304+
}

0 commit comments

Comments
 (0)