|
| 1 | +//go:build e2e |
| 2 | +// +build e2e |
| 3 | + |
| 4 | +// Copyright 2026 The Tekton Authors |
| 5 | +// |
| 6 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +// you may not use this file except in compliance with the License. |
| 8 | +// You may obtain a copy of the License at |
| 9 | +// |
| 10 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +// |
| 12 | +// Unless required by applicable law or agreed to in writing, software |
| 13 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +// See the License for the specific language governing permissions and |
| 16 | +// limitations under the License. |
| 17 | + |
| 18 | +package test |
| 19 | + |
| 20 | +import ( |
| 21 | + "context" |
| 22 | + "fmt" |
| 23 | + "strings" |
| 24 | + "testing" |
| 25 | + "time" |
| 26 | + |
| 27 | + dto "github.com/prometheus/client_model/go" |
| 28 | + "github.com/prometheus/common/expfmt" |
| 29 | + "github.com/prometheus/common/model" |
| 30 | + corev1 "k8s.io/api/core/v1" |
| 31 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 32 | + "k8s.io/client-go/kubernetes" |
| 33 | + "k8s.io/client-go/tools/clientcmd" |
| 34 | +) |
| 35 | + |
| 36 | +const ( |
| 37 | + // pacNamespace is the namespace where Pipelines-as-Code is installed. |
| 38 | + pacNamespace = "pipelines-as-code" |
| 39 | + // pacControllerMetricsPort is the Prometheus metrics port on the PAC controller pod. |
| 40 | + pacControllerMetricsPort = "9090" |
| 41 | +) |
| 42 | + |
| 43 | +// pacKubeClient builds a kubernetes client from the default kubeconfig. |
| 44 | +func pacKubeClient(t *testing.T) kubernetes.Interface { |
| 45 | + t.Helper() |
| 46 | + rules := clientcmd.NewDefaultClientConfigLoadingRules() |
| 47 | + cfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, &clientcmd.ConfigOverrides{}).ClientConfig() |
| 48 | + if err != nil { |
| 49 | + t.Fatalf("Failed to build kubeconfig: %v", err) |
| 50 | + } |
| 51 | + return kubernetes.NewForConfigOrDie(cfg) |
| 52 | +} |
| 53 | + |
| 54 | +// scrapePACPodMetrics scrapes the /metrics endpoint of a PAC pod selected |
| 55 | +// by the given label selector via the Kubernetes API server proxy. |
| 56 | +// It returns an error instead of calling t.Fatalf so it can be retried |
| 57 | +// inside polling loops without aborting the test on transient failures. |
| 58 | +func scrapePACPodMetrics(ctx context.Context, kubeClient kubernetes.Interface, labelSelector string) (map[string]*dto.MetricFamily, error) { |
| 59 | + pods, err := kubeClient.CoreV1().Pods(pacNamespace).List(ctx, metav1.ListOptions{ |
| 60 | + LabelSelector: labelSelector, |
| 61 | + }) |
| 62 | + if err != nil { |
| 63 | + return nil, err |
| 64 | + } |
| 65 | + |
| 66 | + var podName string |
| 67 | + for _, pod := range pods.Items { |
| 68 | + if pod.Status.Phase != corev1.PodRunning { |
| 69 | + continue |
| 70 | + } |
| 71 | + allReady := true |
| 72 | + if len(pod.Status.ContainerStatuses) == 0 { |
| 73 | + allReady = false |
| 74 | + } |
| 75 | + for _, cs := range pod.Status.ContainerStatuses { |
| 76 | + if !cs.Ready { |
| 77 | + allReady = false |
| 78 | + break |
| 79 | + } |
| 80 | + } |
| 81 | + if allReady { |
| 82 | + podName = pod.Name |
| 83 | + break |
| 84 | + } |
| 85 | + } |
| 86 | + if podName == "" { |
| 87 | + return nil, fmt.Errorf("no Running/Ready PAC pod found for selector %q in namespace %s", labelSelector, pacNamespace) |
| 88 | + } |
| 89 | + |
| 90 | + result := kubeClient. |
| 91 | + CoreV1(). |
| 92 | + RESTClient(). |
| 93 | + Get(). |
| 94 | + Resource("pods"). |
| 95 | + Name(podName + ":" + pacControllerMetricsPort). |
| 96 | + Namespace(pacNamespace). |
| 97 | + SubResource("proxy"). |
| 98 | + Suffix("metrics"). |
| 99 | + Do(ctx) |
| 100 | + |
| 101 | + body, err := result.Raw() |
| 102 | + if err != nil { |
| 103 | + return nil, err |
| 104 | + } |
| 105 | + |
| 106 | + parser := expfmt.NewTextParser(model.LegacyValidation) |
| 107 | + families, err := parser.TextToMetricFamilies(strings.NewReader(string(body))) |
| 108 | + if err != nil { |
| 109 | + return nil, err |
| 110 | + } |
| 111 | + return families, nil |
| 112 | +} |
| 113 | + |
| 114 | +// waitForPACMetric polls the pod matching labelSelector until the named metric |
| 115 | +// family appears. Transient scrape errors are logged and retried until timeout. |
| 116 | +func waitForPACMetric(ctx context.Context, t *testing.T, kubeClient kubernetes.Interface, labelSelector, metricName string, timeout time.Duration) map[string]*dto.MetricFamily { |
| 117 | + t.Helper() |
| 118 | + ctx, cancel := context.WithTimeout(ctx, timeout) |
| 119 | + defer cancel() |
| 120 | + for { |
| 121 | + families, err := scrapePACPodMetrics(ctx, kubeClient, labelSelector) |
| 122 | + if err == nil { |
| 123 | + if _, ok := families[metricName]; ok { |
| 124 | + return families |
| 125 | + } |
| 126 | + } else { |
| 127 | + t.Logf("Retrying metrics scrape: %v", err) |
| 128 | + } |
| 129 | + select { |
| 130 | + case <-ctx.Done(): |
| 131 | + t.Fatalf("Timed out waiting for metric %q to appear (waited %v): %v", metricName, timeout, ctx.Err()) |
| 132 | + return nil |
| 133 | + case <-time.After(5 * time.Second): |
| 134 | + } |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +// TestOTelMetrics is a consolidated e2e test for the OpenCensus-to-OpenTelemetry |
| 139 | +// metrics migration in Pipelines-as-Code (PR #2567). It scrapes the PAC |
| 140 | +// controller pod's /metrics endpoint on port 9090 to verify: |
| 141 | +// |
| 142 | +// - Infrastructure metrics use new OTel-based naming: |
| 143 | +// - http_client_request_duration_seconds (knative k8s client instrumentation) |
| 144 | +// - go_* runtime metrics |
| 145 | +// - PAC application metrics are registered: |
| 146 | +// - pipelines_as_code_pipelinerun_count_total |
| 147 | +// - pipelines_as_code_pipelinerun_duration_seconds_sum_total |
| 148 | +// - pipelines_as_code_running_pipelineruns_count |
| 149 | +// - pipelines_as_code_git_provider_api_request_count_total |
| 150 | +// - Old OpenCensus metric names are absent |
| 151 | +// |
| 152 | +// Application counter metrics appear after PAC processes its first PipelineRun. |
| 153 | +// Infrastructure metrics (http_client_*, go_*) appear at startup. |
| 154 | +func TestOTelMetrics(t *testing.T) { |
| 155 | + ctx := context.Background() |
| 156 | + kubeClient := pacKubeClient(t) |
| 157 | + |
| 158 | + // ========== Wait for http_client metrics to appear ========== |
| 159 | + // PAC uses the knative k8s client OTel instrumentation which records |
| 160 | + // http_client_request_duration_seconds for all API server calls. |
| 161 | + |
| 162 | + t.Log("Waiting for http_client_request_duration_seconds to appear on PAC controller") |
| 163 | + families := waitForPACMetric(ctx, t, kubeClient, "app.kubernetes.io/name=controller,app.kubernetes.io/part-of=pipelines-as-code", "http_client_request_duration_seconds", 2*time.Minute) |
| 164 | + t.Logf("Scraped %d metric families from PAC controller", len(families)) |
| 165 | + |
| 166 | + // ========== Infrastructure metric assertions (OTel renames) ========== |
| 167 | + |
| 168 | + t.Run("Renames/k8s_client_uses_http_client_prefix", func(t *testing.T) { |
| 169 | + found := false |
| 170 | + for name := range families { |
| 171 | + if strings.HasPrefix(name, "http_client_") { |
| 172 | + found = true |
| 173 | + break |
| 174 | + } |
| 175 | + } |
| 176 | + if !found { |
| 177 | + t.Error("Expected at least one http_client_* metric from knative k8s client instrumentation, found none") |
| 178 | + } |
| 179 | + }) |
| 180 | + |
| 181 | + t.Run("Renames/go_runtime_uses_standard_prefix", func(t *testing.T) { |
| 182 | + found := false |
| 183 | + for name := range families { |
| 184 | + if strings.HasPrefix(name, "go_") { |
| 185 | + found = true |
| 186 | + break |
| 187 | + } |
| 188 | + } |
| 189 | + if !found { |
| 190 | + t.Error("Expected standard go_* runtime metrics, found none") |
| 191 | + } |
| 192 | + }) |
| 193 | + |
| 194 | + // ========== PAC application metric assertions ========== |
| 195 | + // Counter metrics appear after the first PipelineRun is processed. |
| 196 | + // The gauge appears via registered callback once the collection cycle runs. |
| 197 | + // We log presence/absence rather than failing — in a fresh install with no |
| 198 | + // PipelineRun activity, counters will not yet have been observed. |
| 199 | + |
| 200 | + t.Run("Application/pipelinerun_count", func(t *testing.T) { |
| 201 | + found := false |
| 202 | + for name := range families { |
| 203 | + if name == "pipelines_as_code_pipelinerun_count_total" || name == "pipelines_as_code_pipelinerun_count" { |
| 204 | + found = true |
| 205 | + break |
| 206 | + } |
| 207 | + } |
| 208 | + if found { |
| 209 | + t.Log("pipelines_as_code_pipelinerun_count(_total) found") |
| 210 | + } else { |
| 211 | + t.Log("pipelines_as_code_pipelinerun_count not yet present (no PipelineRuns processed yet)") |
| 212 | + } |
| 213 | + }) |
| 214 | + |
| 215 | + t.Run("Application/pipelinerun_duration_seconds_sum", func(t *testing.T) { |
| 216 | + found := false |
| 217 | + for name := range families { |
| 218 | + if strings.HasPrefix(name, "pipelines_as_code_pipelinerun_duration_seconds_sum") { |
| 219 | + found = true |
| 220 | + break |
| 221 | + } |
| 222 | + } |
| 223 | + if found { |
| 224 | + t.Log("pipelines_as_code_pipelinerun_duration_seconds_sum found") |
| 225 | + } else { |
| 226 | + t.Log("pipelines_as_code_pipelinerun_duration_seconds_sum not yet present (no PipelineRuns processed yet)") |
| 227 | + } |
| 228 | + }) |
| 229 | + |
| 230 | + t.Run("Application/running_pipelineruns_count", func(t *testing.T) { |
| 231 | + if _, ok := families["pipelines_as_code_running_pipelineruns_count"]; ok { |
| 232 | + t.Log("pipelines_as_code_running_pipelineruns_count found") |
| 233 | + } else { |
| 234 | + t.Log("pipelines_as_code_running_pipelineruns_count not yet present (gauge callback not yet called)") |
| 235 | + } |
| 236 | + }) |
| 237 | + |
| 238 | + t.Run("Application/git_provider_api_request_count", func(t *testing.T) { |
| 239 | + found := false |
| 240 | + for name := range families { |
| 241 | + if name == "pipelines_as_code_git_provider_api_request_count_total" || name == "pipelines_as_code_git_provider_api_request_count" { |
| 242 | + found = true |
| 243 | + break |
| 244 | + } |
| 245 | + } |
| 246 | + if found { |
| 247 | + t.Log("pipelines_as_code_git_provider_api_request_count(_total) found") |
| 248 | + } else { |
| 249 | + t.Log("pipelines_as_code_git_provider_api_request_count not yet present (no git API calls yet)") |
| 250 | + } |
| 251 | + }) |
| 252 | + |
| 253 | + // ========== Removed OpenCensus metrics ========== |
| 254 | + // TODO: Remove these assertions in a future release once no OC-based |
| 255 | + // release is supported. |
| 256 | + |
| 257 | + t.Run("Removed/opencensus_pac_metrics", func(t *testing.T) { |
| 258 | + ocPrefixes := []string{ |
| 259 | + "pipelines_as_code/", |
| 260 | + "tekton_pipelines_as_code_", |
| 261 | + } |
| 262 | + for name := range families { |
| 263 | + for _, prefix := range ocPrefixes { |
| 264 | + if strings.HasPrefix(name, prefix) { |
| 265 | + t.Errorf("Old OC metric %q still present; expected removal after OTel migration", name) |
| 266 | + } |
| 267 | + } |
| 268 | + } |
| 269 | + }) |
| 270 | + |
| 271 | + // ========== Watcher pod metrics ========== |
| 272 | + // The PAC watcher uses the knative reconciler workqueue, so it exposes |
| 273 | + // kn_workqueue_* metrics in addition to http_client_* and go_*. |
| 274 | + |
| 275 | + t.Log("Waiting for PAC watcher pod metrics to be available") |
| 276 | + watcherFamilies := waitForPACMetric(ctx, t, kubeClient, "app.kubernetes.io/name=watcher,app.kubernetes.io/part-of=pipelines-as-code", "go_goroutines", 2*time.Minute) |
| 277 | + t.Logf("Scraped %d metric families from PAC watcher", len(watcherFamilies)) |
| 278 | + |
| 279 | + t.Run("Watcher/workqueue_uses_kn_prefix", func(t *testing.T) { |
| 280 | + found := false |
| 281 | + for name := range watcherFamilies { |
| 282 | + if strings.HasPrefix(name, "kn_workqueue_") { |
| 283 | + found = true |
| 284 | + break |
| 285 | + } |
| 286 | + } |
| 287 | + if !found { |
| 288 | + t.Error("Expected at least one kn_workqueue_* metric on the PAC watcher, found none") |
| 289 | + } |
| 290 | + }) |
| 291 | + |
| 292 | + t.Run("Watcher/go_runtime_metrics_present", func(t *testing.T) { |
| 293 | + found := false |
| 294 | + for name := range watcherFamilies { |
| 295 | + if strings.HasPrefix(name, "go_") { |
| 296 | + found = true |
| 297 | + break |
| 298 | + } |
| 299 | + } |
| 300 | + if !found { |
| 301 | + t.Error("Expected standard go_* runtime metrics on PAC watcher, found none") |
| 302 | + } |
| 303 | + }) |
| 304 | +} |
0 commit comments