adobe
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 61 additions & 1 deletion b/‎.github/workflows/ci.yaml‎
Lines changed: 61 additions & 1 deletion
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 10 additions & 1 deletion b/‎Makefile‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/metrics.md‎
Lines changed: 223 additions & 0 deletions b/‎docs/metrics.md‎
Lines changed: 223 additions & 0 deletions
diff --git a/‎internal/check_license.sh‎
Lines changed: 50 additions & 0 deletions b/‎internal/check_license.sh‎
Lines changed: 50 additions & 0 deletions
@@ -1,4 +1,4 @@
-name: CI tests
+name: CI Tests
 
 on: pull_request
 
@@ -32,3 +32,63 @@ jobs:
         run: make local-test
       - name: Run e2e tests
         run: make e2e-tests
+
+  ci-karpenter:
+    runs-on: ubuntu-latest
+    name: ci-karpenter
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.24'
+      - name: Run Gosec Security Scanner
+        uses: securego/gosec@master
+        with:
+          args: -quiet -exclude=G107 ./...
+      - name: Run golangci-lint
+        uses: golangci/golangci-lint-action@v8
+        with:
+          # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version
+          # version: v1.46
+           args: -v --timeout 5m --no-config ./...
+      - name: Install k8s Kind Cluster
+        uses: helm/[email protected]
+        with:
+          install_only: true
+          version: v0.29.0
+      - name: Prepare test environment
+        run: make local-test-karpenter
+      - name: Run e2e tests
+        run: make e2e-tests
+
+  ci-node-labels:
+    runs-on: ubuntu-latest
+    name: ci-node-labels
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.24'
+      - name: Run Gosec Security Scanner
+        uses: securego/gosec@master
+        with:
+          args: -quiet -exclude=G107 ./...
+      - name: Run golangci-lint
+        uses: golangci/golangci-lint-action@v8
+        with:
+          # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version
+          # version: v1.46
+           args: -v --timeout 5m --no-config ./...
+      - name: Install k8s Kind Cluster
+        uses: helm/[email protected]
+        with:
+          install_only: true
+          version: v0.29.0
+      - name: Prepare test environment
+        run: make local-test-node-labels
+      - name: Run e2e tests
+        run: make e2e-tests
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2022 Adobe
+   Copyright 2025 Adobe
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -101,7 +101,16 @@ unit-test: ## Run unit tests
 
 e2e-tests:  ## Run e2e tests for k8s-shredder deployed in a local kind cluster
 	@echo "Run e2e tests for k8s-shredder..."
-	@KUBECONFIG=${PWD}/${KUBECONFIG_LOCALTEST} go test internal/testing/e2e_test.go -v
+	@if [ -f "${PWD}/${KUBECONFIG_KARPENTER}" ]; then \
+		echo "Using Karpenter test cluster configuration..."; \
+		KUBECONFIG=${PWD}/${KUBECONFIG_KARPENTER} go test internal/testing/e2e_test.go -v; \
+	elif [ -f "${PWD}/${KUBECONFIG_NODE_LABELS}" ]; then \
+		echo "Using node labels test cluster configuration..."; \
+		KUBECONFIG=${PWD}/${KUBECONFIG_NODE_LABELS} go test internal/testing/e2e_test.go -v; \
+	else \
+		echo "Using default test cluster configuration..."; \
+		KUBECONFIG=${PWD}/${KUBECONFIG_LOCALTEST} go test internal/testing/e2e_test.go -v; \
+	fi
 
 # DEMO targets
 # -----------
 
@@ -103,6 +103,10 @@ k8s-shredder includes optional automatic detection of nodes with specific labels
 
 This integration allows k8s-shredder to automatically handle node lifecycle management based on custom labeling strategies, enabling teams to mark nodes for parking using their own operational workflows and labels.  For example, this can be used in conjunction with [AKS cluster upgrades](https://learn.microsoft.com/en-us/azure/aks/upgrade-cluster#set-new-cordon-behavior).
 
+## Metrics
+
+k8s-shredder exposes comprehensive metrics for monitoring its operation. You can find detailed information about all available metrics in the [metrics documentation](docs/metrics.md).
+
 #### Creating a new release
 
 See [RELEASE.md](RELEASE.md).
@@ -0,0 +1,223 @@
+# k8s-shredder Metrics
+
+This document describes all the metrics exposed by k8s-shredder. These metrics are available at the `/metrics` endpoint and can be scraped by Prometheus or other monitoring systems.
+
+## Overview
+
+k8s-shredder exposes metrics in Prometheus format to help operators monitor the health and performance of the node parking and eviction processes. The metrics are organized into several categories:
+
+- **Core Operation Metrics**: General operation counters and timing
+- **API Server Metrics**: Kubernetes API interaction metrics
+- **Node Processing Metrics**: Node parking and processing statistics
+- **Pod Processing Metrics**: Pod eviction and processing statistics
+- **Karpenter Integration Metrics**: Karpenter drift detection metrics
+- **Node Label Detection Metrics**: Node label-based detection metrics
+- **Shared Metrics**: Aggregated metrics across all detection methods
+
+## Core Operation Metrics
+
+### `shredder_loops_total`
+- **Type**: Counter
+- **Description**: Total number of eviction loops completed
+- **Use Case**: Monitor the frequency of eviction loop execution and overall system activity
+
+### `shredder_loops_duration_seconds`
+- **Type**: Summary
+- **Description**: Duration of eviction loops in seconds
+- **Objectives**: 0.5: 1200, 0.9: 900, 0.99: 600
+- **Use Case**: Monitor the performance of eviction loops and identify slow operations
+
+### `shredder_errors_total`
+- **Type**: Counter
+- **Description**: Total number of errors encountered during operation
+- **Use Case**: Monitor system health and identify operational issues
+
+## API Server Metrics
+
+### `shredder_apiserver_requests_total`
+- **Type**: Counter Vector
+- **Labels**: `verb`, `resource`, `status`
+- **Description**: Total requests made to the Kubernetes API
+- **Use Case**: Monitor API usage patterns and identify potential rate limiting issues
+
+### `shredder_apiserver_requests_duration_seconds`
+- **Type**: Summary Vector
+- **Labels**: `verb`, `resource`, `status`
+- **Description**: Duration of Kubernetes API requests in seconds
+- **Objectives**: 0.5: 0.05, 0.9: 0.01, 0.99: 0.001
+- **Use Case**: Monitor API performance and identify slow API calls
+
+## Node Processing Metrics
+
+### `shredder_processed_nodes_total`
+- **Type**: Counter
+- **Description**: Total number of nodes processed during eviction loops
+- **Use Case**: Monitor the volume of node processing activity
+
+### `shredder_node_force_to_evict_time`
+- **Type**: Gauge Vector
+- **Labels**: `node_name`
+- **Description**: Unix timestamp when a node will be forcibly evicted
+- **Use Case**: Monitor when nodes are scheduled for forced eviction
+
+## Pod Processing Metrics
+
+### `shredder_processed_pods_total`
+- **Type**: Counter
+- **Description**: Total number of pods processed during eviction loops
+- **Use Case**: Monitor the volume of pod processing activity
+
+### `shredder_pod_errors_total`
+- **Type**: Gauge Vector
+- **Labels**: `pod_name`, `namespace`, `reason`, `action`
+- **Description**: Total pod errors per eviction loop
+- **Use Case**: Monitor pod eviction failures and their reasons
+
+### `shredder_pod_force_to_evict_time`
+- **Type**: Gauge Vector
+- **Labels**: `pod_name`, `namespace`
+- **Description**: Unix timestamp when a pod will be forcibly evicted
+- **Use Case**: Monitor when pods are scheduled for forced eviction
+
+## Karpenter Integration Metrics
+
+### `shredder_karpenter_drifted_nodes_total`
+- **Type**: Counter
+- **Description**: Total number of drifted Karpenter nodes detected
+- **Use Case**: Monitor the volume of Karpenter drift detection activity
+
+### `shredder_karpenter_nodes_parked_total`
+- **Type**: Counter
+- **Description**: Total number of Karpenter nodes successfully parked
+- **Use Case**: Monitor successful Karpenter node parking operations
+
+### `shredder_karpenter_nodes_parking_failed_total`
+- **Type**: Counter
+- **Description**: Total number of Karpenter nodes that failed to be parked
+- **Use Case**: Monitor Karpenter node parking failures
+
+### `shredder_karpenter_processing_duration_seconds`
+- **Type**: Summary
+- **Description**: Duration of Karpenter node processing in seconds
+- **Objectives**: 0.5: 0.05, 0.9: 0.01, 0.99: 0.001
+- **Use Case**: Monitor the performance of Karpenter drift detection and parking operations
+
+## Node Label Detection Metrics
+
+### `shredder_node_label_nodes_parked_total`
+- **Type**: Counter
+- **Description**: Total number of nodes successfully parked via node label detection
+- **Use Case**: Monitor successful node label-based parking operations
+
+### `shredder_node_label_nodes_parking_failed_total`
+- **Type**: Counter
+- **Description**: Total number of nodes that failed to be parked via node label detection
+- **Use Case**: Monitor node label-based parking failures
+
+### `shredder_node_label_processing_duration_seconds`
+- **Type**: Summary
+- **Description**: Duration of node label detection and parking process in seconds
+- **Objectives**: 0.5: 0.05, 0.9: 0.01, 0.99: 0.001
+- **Use Case**: Monitor the performance of node label detection and parking operations
+
+### `shredder_node_label_matching_nodes_total`
+- **Type**: Gauge
+- **Description**: Total number of nodes matching the label criteria
+- **Use Case**: Monitor the current number of nodes that match the configured label selectors
+
+## Shared Metrics
+
+These metrics aggregate data across all detection methods (Karpenter and node label detection) to provide a unified view of node parking activity.
+
+### `shredder_nodes_parked_total`
+- **Type**: Counter
+- **Description**: Total number of nodes successfully parked (shared across all detection methods)
+- **Use Case**: Monitor total node parking activity regardless of detection method
+
+### `shredder_nodes_parking_failed_total`
+- **Type**: Counter
+- **Description**: Total number of nodes that failed to be parked (shared across all detection methods)
+- **Use Case**: Monitor total node parking failures regardless of detection method
+
+### `shredder_processing_duration_seconds`
+- **Type**: Summary
+- **Description**: Duration of node processing in seconds (shared across all detection methods)
+- **Objectives**: 0.5: 0.05, 0.9: 0.01, 0.99: 0.001
+- **Use Case**: Monitor total node processing performance regardless of detection method
+
+## Metric Relationships
+
+### Detection Method Metrics
+- **Karpenter metrics** are incremented when `EnableKarpenterDriftDetection=true`
+- **Node label metrics** are incremented when `EnableNodeLabelDetection=true`
+- **Shared metrics** are incremented whenever either detection method processes nodes
+
+### Processing Flow
+1. **Detection**: Nodes are identified via Karpenter drift or label matching
+2. **Parking**: Nodes are labeled, cordoned, and tainted
+3. **Eviction**: Pods are evicted from parked nodes over time
+4. **Cleanup**: Nodes are eventually removed when all pods are evicted
+
+## Alerting Recommendations
+
+### High Error Rates
+```promql
+rate(shredder_errors_total[5m]) > 0.1
+```
+
+### Slow Processing
+```promql
+histogram_quantile(0.95, rate(shredder_processing_duration_seconds_bucket[5m])) > 30
+```
+
+### Failed Node Parking
+```promql
+rate(shredder_nodes_parking_failed_total[5m]) > 0
+```
+
+### High API Latency
+```promql
+histogram_quantile(0.95, rate(shredder_apiserver_requests_duration_seconds_bucket[5m])) > 5
+```
+
+### Parked Pods Alert
+```promql
+# Alert when pods are running on parked nodes
+kube_ethos_upgrade:parked_pod > 0
+```
+
+## Example Queries
+
+### Node Parking Success Rate
+```promql
+rate(shredder_nodes_parked_total[5m]) / (rate(shredder_nodes_parked_total[5m]) + rate(shredder_nodes_parking_failed_total[5m]))
+```
+
+### Average Processing Duration
+```promql
+histogram_quantile(0.5, rate(shredder_processing_duration_seconds_bucket[5m]))
+```
+
+### Nodes Parked by Detection Method
+```promql
+# Karpenter nodes
+rate(shredder_karpenter_nodes_parked_total[5m])
+
+# Label-based nodes
+rate(shredder_node_label_nodes_parked_total[5m])
+```
+
+### Current Matching Nodes
+```promql
+shredder_node_label_matching_nodes_total
+```
+
+## Configuration
+
+Metrics are exposed on the configured port (default: 8080) at the `/metrics` endpoint. The metrics server can be configured using the following options:
+
+- **Metrics Port**: Configure the port for metrics exposure
+- **Health Endpoint**: Available at `/healthz` for health checks
+- **OpenMetrics Format**: Enabled by default for better compatibility
+
+For more information about configuring k8s-shredder, see the [main README](../README.md). 
@@ -1,6 +1,56 @@
 #!/usr/bin/env bash
 set -ueo pipefail
 
+CURRENT_YEAR=$(date +%Y)
+export CURRENT_YEAR
+
+# Function to update copyright year in Go files
+update_go_copyright_year() {
+    local file=$1
+    local temp_file=$(mktemp)
+    
+    # Check if file has a copyright header
+    if head -n3 "$file" | grep -q "Copyright.*20[0-9]\{2\}"; then
+        # Update the year to current year
+        echo "Processing file: $file"
+        # Now do the replacement
+        sed "s/202[0-9]/$CURRENT_YEAR/g" "$file" > "$temp_file"
+    else
+        # Add copyright header if missing
+        echo "// Copyright $CURRENT_YEAR Adobe. All rights reserved." > "$temp_file"
+        cat "$file" >> "$temp_file"
+    fi
+    
+    # Replace original file with modified content
+    mv "$temp_file" "$file"
+}
+
+# Function to update copyright year in LICENSE file
+update_license_copyright_year() {
+    local file=$1
+    local temp_file=$(mktemp)
+    
+    echo "Processing LICENSE file"
+    
+    # Update only the line containing "Copyright 2022 Adobe"
+    sed "s/Copyright 202[0-9] Adobe/Copyright $CURRENT_YEAR Adobe/g" "$file" > "$temp_file"
+    
+    # Replace original file with modified content
+    mv "$temp_file" "$file"
+}
+
+export -f update_go_copyright_year
+export -f update_license_copyright_year
+
+# Update LICENSE file if it exists
+if [ -f "LICENSE" ]; then
+    update_license_copyright_year "LICENSE"
+fi
+
+# Find all Go files and update their copyright headers
+find . -type f -iname '*.go' ! -path '*/vendor/*' -exec bash -c 'update_go_copyright_year "$1"' _ {} \;
+
+# Check if any files are missing the license header
 licRes=$(
     find . -type f -iname '*.go' ! -path '*/vendor/*' -exec \
          sh -c 'head -n3 $1 | grep -Eq "(Copyright|generated|GENERATED)" || echo "$1"' {} {} \;