Skip to content

Commit a58f363

Browse files
authored
Add fixture/snapshot tests (#101)
* Add fixture/snapshot tests Add tests that transform vendored API responses (fixtures) into vendored /metrics output (snapshots), so that we can see how changes to the exporter implementation affect the actual output of the exporter, in the form of changes to the snapshots. I've sourced the fixtures from debug JSON logging of real API responses from tasks running in AWS. Each task consisted of an ecs_exporter container and a prometheus container. My expectation is that fixtures should not be updated unless we become informed that API responses have materially changed, e.g. because new features we are using were added, or breaking changes have been made. I've got two sets of fixtures and snapshots, one for EC2 and one for Fargate. The API responses differ between EC2 and Fargate enough (because, among other things, they use two completely different implementations of the task metadata API, and two different container runtimes entirely!) that I think it's worth covering them both independently. Closes #99. Signed-off-by: Ian Kerins <[email protected]> * Fix typo'd metric name Compare to the sibling metric whose name is spelled correctly: ecs_network_receive_packets_dropped_total. This commit shows the snapshot tests in action. Signed-off-by: Ian Kerins <[email protected]> * Fix nil pointer dereference with stopped container in EC2 As the updated task stats fixture for EC2 shows, stopped containers in EC2 have the empty JSON object for their stats, instead of the `null` that Fargate has, which triggered a nil pointer dereference that is now fixed. Signed-off-by: Ian Kerins <[email protected]> * Load test fixtures at test execution time Signed-off-by: Ian Kerins <[email protected]> * Generate snapshot diff using testutil.CollectAndCompare Signed-off-by: Ian Kerins <[email protected]> --------- Signed-off-by: Ian Kerins <[email protected]>
1 parent a72e49c commit a58f363

File tree

10 files changed

+1417
-191
lines changed

10 files changed

+1417
-191
lines changed

README.md

Lines changed: 7 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -48,190 +48,10 @@ None.
4848

4949
## Example output
5050

51-
(With `--web.disable-exporter-metrics` passed, such that standard Go metrics are not included here.)
52-
53-
```
54-
# HELP ecs_container_cpu_usage_seconds_total Cumulative total container CPU usage in seconds.
55-
# TYPE ecs_container_cpu_usage_seconds_total counter
56-
ecs_container_cpu_usage_seconds_total{container_name="ecs-exporter"} 0.028057878
57-
# HELP ecs_container_memory_limit_bytes Configured container memory limit in bytes, set from the container-level limit in the task definition if any, otherwise the task-level limit.
58-
# TYPE ecs_container_memory_limit_bytes gauge
59-
ecs_container_memory_limit_bytes{container_name="ecs-exporter"} 5.36870912e+08
60-
# HELP ecs_container_memory_page_cache_size_bytes Current container memory page cache size in bytes. This is not a subset of used bytes.
61-
# TYPE ecs_container_memory_page_cache_size_bytes gauge
62-
ecs_container_memory_page_cache_size_bytes{container_name="ecs-exporter"} 0
63-
# HELP ecs_container_memory_usage_bytes Current container memory usage in bytes.
64-
# TYPE ecs_container_memory_usage_bytes gauge
65-
ecs_container_memory_usage_bytes{container_name="ecs-exporter"} 4.243456e+06
66-
# HELP ecs_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which ecs_exporter was built, and the goos and goarch for the build.
67-
# TYPE ecs_exporter_build_info gauge
68-
ecs_exporter_build_info{branch="",goarch="arm64",goos="linux",goversion="go1.23.2",revision="unknown",tags="unknown",version=""} 1
69-
# HELP ecs_network_receive_bytes_total Cumulative total size of network packets received in bytes.
70-
# TYPE ecs_network_receive_bytes_total counter
71-
ecs_network_receive_bytes_total{interface="eth1"} 1.1172419e+07
72-
# HELP ecs_network_receive_errors_total Cumulative total count of network errors in receiving.
73-
# TYPE ecs_network_receive_errors_total counter
74-
ecs_network_receive_errors_total{interface="eth1"} 0
75-
# HELP ecs_network_receive_packets_dropped_total Cumulative total count of network packets dropped in receiving.
76-
# TYPE ecs_network_receive_packets_dropped_total counter
77-
ecs_network_receive_packets_dropped_total{interface="eth1"} 0
78-
# HELP ecs_network_receive_packets_total Cumulative total count of network packets received.
79-
# TYPE ecs_network_receive_packets_total counter
80-
ecs_network_receive_packets_total{interface="eth1"} 8084
81-
# HELP ecs_network_transmit_bytes_total Cumulative total size of network packets transmitted in bytes.
82-
# TYPE ecs_network_transmit_bytes_total counter
83-
ecs_network_transmit_bytes_total{interface="eth1"} 178817
84-
# HELP ecs_network_transmit_dropped_total Cumulative total count of network packets dropped in transmit.
85-
# TYPE ecs_network_transmit_dropped_total counter
86-
ecs_network_transmit_dropped_total{interface="eth1"} 0
87-
# HELP ecs_network_transmit_errors_total Cumulative total count of network errors in transmit.
88-
# TYPE ecs_network_transmit_errors_total counter
89-
ecs_network_transmit_errors_total{interface="eth1"} 0
90-
# HELP ecs_network_transmit_packets_total Cumulative total count of network packets transmitted.
91-
# TYPE ecs_network_transmit_packets_total counter
92-
ecs_network_transmit_packets_total{interface="eth1"} 897
93-
# HELP ecs_task_cpu_limit_vcpus Configured task CPU limit in vCPUs (1 vCPU = 1024 CPU units). This is optional when running on EC2; if no limit is set, this metric has no value.
94-
# TYPE ecs_task_cpu_limit_vcpus gauge
95-
ecs_task_cpu_limit_vcpus 0.25
96-
# HELP ecs_task_ephemeral_storage_allocated_bytes Configured Fargate task ephemeral storage allocated size in bytes.
97-
# TYPE ecs_task_ephemeral_storage_allocated_bytes gauge
98-
ecs_task_ephemeral_storage_allocated_bytes 2.1491613696e+10
99-
# HELP ecs_task_ephemeral_storage_used_bytes Current Fargate task ephemeral storage usage in bytes.
100-
# TYPE ecs_task_ephemeral_storage_used_bytes gauge
101-
ecs_task_ephemeral_storage_used_bytes 3.7748736e+07
102-
# HELP ecs_task_image_pull_start_timestamp_seconds The time at which the task started pulling docker images for its containers.
103-
# TYPE ecs_task_image_pull_start_timestamp_seconds gauge
104-
ecs_task_image_pull_start_timestamp_seconds 1.737156015124145e+09
105-
# HELP ecs_task_image_pull_stop_timestamp_seconds The time at which the task stopped (i.e. completed) pulling docker images for its containers.
106-
# TYPE ecs_task_image_pull_stop_timestamp_seconds gauge
107-
ecs_task_image_pull_stop_timestamp_seconds 1.7371560172684324e+09
108-
# HELP ecs_task_memory_limit_bytes Configured task memory limit in bytes. This is optional when running on EC2; if no limit is set, this metric has no value.
109-
# TYPE ecs_task_memory_limit_bytes gauge
110-
ecs_task_memory_limit_bytes 5.36870912e+08
111-
# HELP ecs_task_metadata_info ECS task metadata, sourced from the task metadata endpoint version 4.
112-
# TYPE ecs_task_metadata_info gauge
113-
ecs_task_metadata_info{availability_zone="us-east-1a",cluster="arn:aws:ecs:us-east-1:829490980523:cluster/prom-ecs-exporter-sandbox",desired_status="RUNNING",family="prom-ecs-exporter-sandbox-isker-fix-network-metrics-fargate",known_status="RUNNING",launch_type="FARGATE",revision="1",task_arn="arn:aws:ecs:us-east-1:829490980523:task/prom-ecs-exporter-sandbox/c8387acdc4884a0fa13dae78e68a989f"} 1
114-
```
115-
116-
## Example task definition
117-
118-
```
119-
{
120-
"ipcMode": null,
121-
"executionRoleArn": "arn:aws:iam::ACCOUNT_ID:role/ecsTaskExecutionRole",
122-
"containerDefinitions": [
123-
{
124-
"dnsSearchDomains": null,
125-
"environmentFiles": null,
126-
"logConfiguration": {
127-
"logDriver": "awslogs",
128-
"secretOptions": null,
129-
"options": {
130-
"awslogs-group": "/ecs/ecs-exporter",
131-
"awslogs-region": "us-west-2",
132-
"awslogs-stream-prefix": "ecs"
133-
}
134-
},
135-
"entryPoint": null,
136-
"portMappings": [
137-
{
138-
"hostPort": 9779,
139-
"protocol": "tcp",
140-
"containerPort": 9779
141-
}
142-
],
143-
"command": null,
144-
"linuxParameters": null,
145-
"cpu": 0,
146-
"environment": [],
147-
"resourceRequirements": null,
148-
"ulimits": null,
149-
"dnsServers": null,
150-
"mountPoints": [],
151-
"workingDirectory": null,
152-
"secrets": null,
153-
"dockerSecurityOptions": null,
154-
"memory": null,
155-
"memoryReservation": null,
156-
"volumesFrom": [],
157-
"stopTimeout": null,
158-
"image": "quay.io/prometheuscommunity/ecs-exporter:v0.1.0",
159-
"startTimeout": null,
160-
"firelensConfiguration": null,
161-
"dependsOn": null,
162-
"disableNetworking": null,
163-
"interactive": null,
164-
"healthCheck": null,
165-
"essential": true,
166-
"links": null,
167-
"hostname": null,
168-
"extraHosts": null,
169-
"pseudoTerminal": null,
170-
"user": null,
171-
"readonlyRootFilesystem": null,
172-
"dockerLabels": null,
173-
"systemControls": null,
174-
"privileged": null,
175-
"name": "ecs-exporter"
176-
}
177-
],
178-
"placementConstraints": [],
179-
"memory": "512",
180-
"taskRoleArn": "arn:aws:iam::ACCOUNT_ID:role/ecsTaskExecutionRole",
181-
"compatibilities": [
182-
"EC2",
183-
"FARGATE"
184-
],
185-
"taskDefinitionArn": "arn:aws:ecs:us-west-2:ACCOUNT_ID:task-definition/ecs-exporter:1",
186-
"family": "ecs-exporter",
187-
"requiresAttributes": [
188-
{
189-
"targetId": null,
190-
"targetType": null,
191-
"value": null,
192-
"name": "com.amazonaws.ecs.capability.logging-driver.awslogs"
193-
},
194-
{
195-
"targetId": null,
196-
"targetType": null,
197-
"value": null,
198-
"name": "ecs.capability.execution-role-awslogs"
199-
},
200-
{
201-
"targetId": null,
202-
"targetType": null,
203-
"value": null,
204-
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.19"
205-
},
206-
{
207-
"targetId": null,
208-
"targetType": null,
209-
"value": null,
210-
"name": "com.amazonaws.ecs.capability.task-iam-role"
211-
},
212-
{
213-
"targetId": null,
214-
"targetType": null,
215-
"value": null,
216-
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.18"
217-
},
218-
{
219-
"targetId": null,
220-
"targetType": null,
221-
"value": null,
222-
"name": "ecs.capability.task-eni"
223-
}
224-
],
225-
"pidMode": null,
226-
"requiresCompatibilities": [
227-
"FARGATE"
228-
],
229-
"networkMode": "awsvpc",
230-
"cpu": "256",
231-
"revision": 1,
232-
"status": "ACTIVE",
233-
"inferenceAccelerators": null,
234-
"proxyConfiguration": null,
235-
"volumes": []
236-
}
237-
```
51+
Check out the [metrics snapshots](./ecscollector/testdata/snapshots) which
52+
contain sample metrics emitted by ecs_exporter in the [Prometheus text
53+
format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format)
54+
you should expect to see on /metrics. Note that these snapshots behave as if
55+
`--web.disable-exporter-metrics` were passed when running ecs_exporter, such
56+
that standard [client_golang](https://github.com/prometheus/client_golang)
57+
metrics are not included.

ecscollector/collector.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ var (
124124
networkLabels, nil)
125125

126126
networkTxDroppedDesc = prometheus.NewDesc(
127-
"ecs_network_transmit_dropped_total",
127+
"ecs_network_transmit_packets_dropped_total",
128128
"Cumulative total count of network packets dropped in transmit.",
129129
networkLabels, nil)
130130

@@ -196,7 +196,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
196196
c.logger.Debug("Failed to retrieve metadata", "error", err)
197197
return
198198
}
199-
c.logger.Debug("Got ECS task metadata response", "stats", metadata)
199+
c.logger.Debug("Got ECS task metadata response", "metadata", metadata)
200200

201201
ch <- prometheus.MustNewConstMetric(
202202
taskMetadataDesc,
@@ -269,10 +269,10 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
269269
networks := make(map[string]*container.NetworkStats)
270270
for _, container := range metadata.Containers {
271271
s := stats[container.ID]
272-
if s == nil {
272+
if s == nil || s.StatsJSON == nil {
273273
// This can happen if the container is stopped; if it's
274274
// nonessential, the task goes on.
275-
c.logger.Debug("Couldn't find container with ID in stats", "id", container.ID)
275+
c.logger.Debug("Couldn't find stats for container", "id", container.ID)
276276
continue
277277
}
278278

ecscollector/collector_test.go

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Copyright 2025 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package ecscollector
15+
16+
import (
17+
"errors"
18+
"flag"
19+
"fmt"
20+
"io"
21+
"log/slog"
22+
"net/http"
23+
"net/http/httptest"
24+
"os"
25+
"path/filepath"
26+
"testing"
27+
28+
"github.com/prometheus-community/ecs_exporter/ecsmetadata"
29+
"github.com/prometheus/client_golang/prometheus"
30+
"github.com/prometheus/client_golang/prometheus/promhttp"
31+
"github.com/prometheus/client_golang/prometheus/testutil"
32+
)
33+
34+
// Create a metadata client that will always receive the given fixture API
35+
// responses.
36+
func fixtureClient(taskMetadataPath, taskStatsPath string) (*ecsmetadata.Client, *httptest.Server, error) {
37+
taskMetadata, err := os.ReadFile(taskMetadataPath)
38+
if err != nil {
39+
return nil, nil, fmt.Errorf("failed to read task metadata fixture: %w", err)
40+
}
41+
taskStats, err := os.ReadFile(taskStatsPath)
42+
if err != nil {
43+
return nil, nil, fmt.Errorf("failed to read task stats fixture: %w", err)
44+
}
45+
46+
mux := http.NewServeMux()
47+
mux.HandleFunc("GET /task", func(w http.ResponseWriter, r *http.Request) {
48+
w.Header().Add("content-type", "application/json")
49+
w.Write(taskMetadata)
50+
})
51+
mux.HandleFunc("GET /task/stats", func(w http.ResponseWriter, r *http.Request) {
52+
w.Header().Add("content-type", "application/json")
53+
w.Write(taskStats)
54+
})
55+
56+
server := httptest.NewServer(mux)
57+
return ecsmetadata.NewClient(server.URL), server, nil
58+
}
59+
60+
// Renders metrics from the given collector to the prometheus text exposition
61+
// format.
62+
func renderMetrics(collector prometheus.Collector) ([]byte, error) {
63+
registry := prometheus.NewRegistry()
64+
registry.MustRegister(collector)
65+
66+
// It seems that the only way to really get full /metrics output is with
67+
// promhttp. There is testutil.CollectAndFormat but it requires you to
68+
// specify every metric name you want in the output, which seems to be not
69+
// worth it compared to this.
70+
promServer := httptest.NewServer(promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
71+
defer promServer.Close()
72+
resp, err := http.Get(promServer.URL)
73+
if err != nil {
74+
return nil, fmt.Errorf("metrics request failed: %w", err)
75+
}
76+
77+
defer resp.Body.Close()
78+
if resp.StatusCode != 200 {
79+
return nil, fmt.Errorf("non-200 metrics response: %v", resp.StatusCode)
80+
}
81+
metrics, err := io.ReadAll(resp.Body)
82+
if err != nil {
83+
return nil, fmt.Errorf("failed to read metrics response body: %w", err)
84+
}
85+
return metrics, nil
86+
}
87+
88+
var updateSnapshots = flag.Bool("update-snapshots", false, "update snapshot files")
89+
90+
func assertSnapshot(t *testing.T, collector prometheus.Collector, path string) {
91+
if *updateSnapshots {
92+
metrics, err := renderMetrics(collector)
93+
if err != nil {
94+
t.Fatalf("failed to render new snapshot %s: %v", path, err)
95+
}
96+
dir := filepath.Dir(path)
97+
if err := os.MkdirAll(dir, 0750); err != nil {
98+
t.Fatalf("failed to create snapshot output directory %s: %v", dir, err)
99+
} else if err := os.WriteFile(path, metrics, 0666); err != nil {
100+
t.Fatalf("failed to write snapshot file %s: %v", path, err)
101+
} else {
102+
t.Logf("updated snapshot: %s", path)
103+
}
104+
}
105+
106+
file, err := os.Open(path)
107+
if errors.Is(err, os.ErrNotExist) {
108+
t.Fatalf("snapshot file does not exist, set the -update-snapshots flag to update: %v", err)
109+
} else if err != nil {
110+
t.Fatalf("failed to open snapshot file: %v", err)
111+
} else if err := testutil.CollectAndCompare(collector, file); err != nil {
112+
t.Fatalf("snapshot outdated, set the -update-snapshots flag to update\n%v", err)
113+
}
114+
}
115+
116+
func TestFargateMetrics(t *testing.T) {
117+
metadataClient, metadataServer, err := fixtureClient(
118+
"testdata/fixtures/fargate_task_metadata.json",
119+
"testdata/fixtures/fargate_task_stats.json",
120+
)
121+
if err != nil {
122+
t.Fatalf("failed to load test fixtures: %v", err)
123+
}
124+
defer metadataServer.Close()
125+
collector := NewCollector(metadataClient, slog.Default())
126+
assertSnapshot(t, collector, "testdata/snapshots/fargate_metrics.txt")
127+
}
128+
129+
func TestEc2Metrics(t *testing.T) {
130+
metadataClient, metadataServer, err := fixtureClient(
131+
"testdata/fixtures/ec2_task_metadata.json",
132+
"testdata/fixtures/ec2_task_stats.json",
133+
)
134+
if err != nil {
135+
t.Fatalf("failed to load test fixtures: %v", err)
136+
}
137+
defer metadataServer.Close()
138+
collector := NewCollector(metadataClient, slog.Default())
139+
assertSnapshot(t, collector, "testdata/snapshots/ec2_metrics.txt")
140+
}

0 commit comments

Comments
 (0)