diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 48f3b27..085f75d 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -53,6 +53,16 @@ jobs: curl --fail http://127.0.0.1:9091/api/v1/query?query=dns_query_result_code | grep dns_query_result_code curl --fail http://127.0.0.1:9091/api/v1/query?query=ethtool_duplex | grep ethtool_duplex curl --fail http://127.0.0.1:9091/api/v1/query?query=kernel_boot_time_total | grep kernel_boot_time_total + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_cpu_time_seconds_total | grep system_cpu_time_seconds_total + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_disk_io_time_seconds_total | grep system_disk_io_time_seconds_total + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_filesystem_inodes_usage | grep system_filesystem_inodes_usage + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_memory_usage_bytes | grep system_memory_usage_bytes + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_network_connections | grep system_network_connections + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_paging_faults_total | grep system_paging_faults_total + curl --fail http://127.0.0.1:9091/api/v1/query?query=system_processes_count | grep system_processes_count + echo "Wait 3 minutes for go app to start..." && sleep 3m + curl --fail http://127.0.0.1:9091/api/v1/query?query=spdk_bdev_bytes_read_total | grep spdk_bdev_bytes_read_total + curl "http://127.0.0.1:16686/api/traces?service=spdk-client&limit=1" - name: Logs if: always() diff --git a/config/demo-trace/go.mod b/config/demo-trace/go.mod new file mode 100644 index 0000000..c6d2d2f --- /dev/null +++ b/config/demo-trace/go.mod @@ -0,0 +1,35 @@ +module example.com/demo + +go 1.23 + +require ( + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 + go.opentelemetry.io/otel v1.32.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0 + go.opentelemetry.io/otel/metric v1.32.0 + go.opentelemetry.io/otel/sdk/metric v1.32.0 +) + +require ( + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/text v0.20.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect +) + +require ( + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.6.0 // indirect + go.opentelemetry.io/otel/sdk v1.32.0 + go.opentelemetry.io/otel/trace v1.32.0 // indirect + golang.org/x/sys v0.27.0 // indirect + google.golang.org/protobuf v1.35.1 // indirect +) diff --git a/config/demo-trace/go.sum b/config/demo-trace/go.sum new file mode 100644 index 0000000..685dbb9 --- /dev/null +++ b/config/demo-trace/go.sum @@ -0,0 +1,59 @@ +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0 h1:DheMAlT6POBP+gh8RUH19EOTnQIor5QE0uSRPtzCpSw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.57.0/go.mod h1:wZcGmeVO9nzP67aYSLDqXNWK87EZWhi7JWj1v7ZXf94= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7ZSD+5yn+lo3sGV69nW04rRR0jhYnBwjuX3r0HvnK0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 h1:IJFEoHiytixx8cMiVAO+GmHR6Frwu+u5Ur8njpFO6Ac= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0/go.mod h1:3rHrKNtLIoS0oZwkY2vxi+oJcwFRWdtUyRII+so45p8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0 h1:9kV11HXBHZAvuPUZxmMWrH8hZn/6UnHX4K0mu36vNsU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0/go.mod h1:JyA0FHXe22E1NeNiHmVp7kFHglnexDQ7uRWDiiJ1hKQ= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/config/demo-trace/main.go b/config/demo-trace/main.go new file mode 100644 index 0000000..c76a9ad --- /dev/null +++ b/config/demo-trace/main.go @@ -0,0 +1,183 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "time" + + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/propagation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" +) + +type Bdev struct { + Name string `json:"name"` + BytesRead int64 `json:"bytes_read"` + NumReadOps int64 `json:"num_read_ops"` + BytesWritten int64 `json:"bytes_written"` + NumWriteOps int64 `json:"num_write_ops"` + ReadLatencyTicks int64 `json:"read_latency_ticks"` + WriteLatencyTicks int64 `json:"write_latency_ticks"` +} + +type SPDKResponse struct { + Result struct { + Bdevs []Bdev `json:"bdevs"` + } `json:"result"` +} + +func initProvider() func() { + ctx := context.Background() + + res, err := resource.New(ctx, + + resource.WithAttributes( + semconv.ServiceNameKey.String("spdk-client"), + ), + ) + handleErr(err, "failed to create resource") + + otelAgentAddr := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + if otelAgentAddr == "" { + otelAgentAddr = "otel-gw-collector:4317" + } + + // Metric Exporter + metricExp, err := otlpmetricgrpc.New( + ctx, + otlpmetricgrpc.WithInsecure(), + otlpmetricgrpc.WithEndpoint(otelAgentAddr), + ) + handleErr(err, "Failed to create the collector metric exporter") + + meterProvider := sdkmetric.NewMeterProvider( + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp)), + sdkmetric.WithResource(res), + ) + otel.SetMeterProvider(meterProvider) + + // Trace Exporter + traceClient := otlptracegrpc.NewClient( + otlptracegrpc.WithInsecure(), + otlptracegrpc.WithEndpoint(otelAgentAddr)) + traceExp, err := otlptrace.New(ctx, traceClient) + handleErr(err, "Failed to create trace exporter") + + bsp := sdktrace.NewBatchSpanProcessor(traceExp) + tracerProvider := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(bsp), + sdktrace.WithResource(res), + ) + otel.SetTracerProvider(tracerProvider) + otel.SetTextMapPropagator(propagation.TraceContext{}) + + return func() { + cxt, cancel := context.WithTimeout(ctx, time.Second) + defer cancel() + if err := traceExp.Shutdown(cxt); err != nil { + log.Printf("Failed to shutdown trace exporter: %v", err) + } + if err := meterProvider.Shutdown(cxt); err != nil { + log.Printf("Failed to shutdown metric exporter: %v", err) + } + } +} + +func handleErr(err error, message string) { + if err != nil { + log.Fatalf("%s: %v", message, err) + } +} + +func fetchSPDKMetrics(ctx context.Context) []Bdev { + url := "http://spdk:9009" + reqBody := []byte(`{"id":1, "method": "bdev_get_iostat"}`) + + client := http.Client{ + Transport: otelhttp.NewTransport(http.DefaultTransport), + } + + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(reqBody)) + if err != nil { + log.Fatalf("Failed to create request: %v", err) + } + req.SetBasicAuth("spdkuser", "spdkpass") + req.Header.Set("Content-Type", "application/json") + + resp, err := client.Do(req) + if err != nil { + log.Fatalf("Failed to send request: %v", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Fatalf("Failed to read response body: %v", err) + } + + var response SPDKResponse + err = json.Unmarshal(body, &response) + if err != nil { + log.Fatalf("Failed to parse SPDK response: %v", err) + } + + return response.Result.Bdevs +} + +func main() { + shutdown := initProvider() + defer shutdown() + + tracer := otel.Tracer("spdk-client") + meter := otel.Meter("spdk-client-meter") + + // Metrics + bytesRead, _ := meter.Int64Counter("spdk/bdev/bytes_read") + numReadOps, _ := meter.Int64Counter("spdk/bdev/read_ops") + + for{ + ctx, span := tracer.Start(context.Background(), "FetchSPDKMetrics") + bdevs := fetchSPDKMetrics(ctx) + + for _, bdev := range bdevs { + attributes := []attribute.KeyValue{ + attribute.String("bdev.name", bdev.Name), + } + bytesRead.Add(ctx, bdev.BytesRead, metric.WithAttributes(attributes...)) + numReadOps.Add(ctx, bdev.NumReadOps, metric.WithAttributes(attributes...)) + + fmt.Printf("Bdev: %s, BytesRead: %d, NumReadOps: %d\n", + bdev.Name, bdev.BytesRead, bdev.NumReadOps) + } + span.End() + time.Sleep(5 * time.Second) + } + // Health check endpoint + http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprintln(w, "OK") + }) + + // Application logic + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "Hello, World!") + }) + + // Start server + http.ListenAndServe(":57400", nil) +} diff --git a/config/otel-collector-config.yaml b/config/otel-collector-config.yaml index 512203f..5745f35 100644 --- a/config/otel-collector-config.yaml +++ b/config/otel-collector-config.yaml @@ -3,13 +3,30 @@ receivers: otlp: protocols: grpc: + + hostmetrics: + collection_interval: 1m + scrapers: + cpu: + load: + memory: + disk: + filesystem: + network: + paging: + processes: exporters: prometheus: endpoint: "0.0.0.0:8889" const_labels: label1: value1 - + + otlp: + endpoint: jaeger-all-in-one:4317 + tls: + insecure: true + logging: processors: @@ -28,8 +45,8 @@ service: traces: receivers: [otlp] processors: [batch] - exporters: [logging] + exporters: [logging,otlp] metrics: - receivers: [otlp] + receivers: [otlp,hostmetrics] processors: [batch] exporters: [logging, prometheus] diff --git a/docker-compose.yml b/docker-compose.yml index 87140f9..30468fc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -146,6 +146,40 @@ services: timeout: 10s retries: 5 + go-app: + image: golang:1.23-alpine + working_dir: /app + volumes: + - ./config/demo-trace:/app + ports: + - "57400:57400" + command: sh -c "go run main.go" + depends_on: + - otel-gw-collector + - spdk + networks: + - opi + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:57400/health + interval: 10s + timeout: 10s + retries: 5 + + jaeger-all-in-one: + image: jaegertracing/all-in-one:latest + restart: always + ports: + - "16686:16686" + - "14268" + - "14250" + networks: + - opi + healthcheck: + test: wget --no-verbose --tries=1 --spider http://localhost:16686 || exit 1 + interval: 6s + timeout: 10s + retries: 5 + volumes: influxdb-storage: diff --git a/hostmetricsreceiver.md b/hostmetricsreceiver.md index b8511b7..12f815c 100644 --- a/hostmetricsreceiver.md +++ b/hostmetricsreceiver.md @@ -30,6 +30,18 @@ hostmetrics: ... ``` +## Query examples + +```text +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_cpu_time_seconds_total | grep system_cpu_time_seconds_total +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_disk_io_time_seconds_total | grep system_disk_io_time_seconds_total +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_filesystem_inodes_usage | grep system_filesystem_inodes_usage +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_memory_usage_bytes | grep system_memory_usage_bytes +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_network_connections | grep system_network_connections +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_paging_faults_total | grep system_paging_faults_total +curl --fail http://127.0.0.1:9091/api/v1/query?query=system_processes_count | grep system_processes_count +``` + The available scrapers are: | Scraper | Supported OSs | Description |