Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions confgenerator/confgenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir, stateDi
agentSelfMetrics := AgentSelfMetrics{
MetricsVersionLabel: metricVersionLabel,
LoggingVersionLabel: loggingVersionLabel,
FluentBitPort: fluentbit.MetricsPort,
OtelPort: otel.MetricsPort,
FluentBitPort: int(uc.GetFluentBitMetricsPort()),
OtelPort: int(uc.GetOtelMetricsPort()),
OtelRuntimeDir: outDir,
}
agentSelfMetrics.AddSelfMetricsPipelines(receiverPipelines, pipelines, ctx)
Expand All @@ -238,6 +238,7 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir, stateDi
LogLevel: uc.getOTelLogLevel(),
ReceiverPipelines: receiverPipelines,
Pipelines: pipelines,
MetricsPort: uc.GetOtelMetricsPort(),
Exporters: map[otel.ExporterType]otel.ExporterComponents{
otel.System: {
Exporter: googleCloudExporter(userAgent, false, false),
Expand Down Expand Up @@ -608,7 +609,7 @@ func (uc *UnifiedConfig) generateFluentbitComponents(ctx context.Context, userAg
out = append(out, addGceMetadataAttributesProcessor(ctx).Components(ctx, "*", "*.default-data-proc.gce_metadata")...)
}
out = append(out, uc.generateSelfLogsComponents(ctx, userAgent)...)
out = append(out, fluentbit.MetricsOutputComponent())
out = append(out, fluentbit.MetricsOutputComponent(int(uc.GetFluentBitMetricsPort())))

return out, nil
}
Expand Down
20 changes: 20 additions & 0 deletions confgenerator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"reflect"
"runtime"
"slices"
"sort"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -66,6 +68,24 @@ func (uc *UnifiedConfig) HasCombined() bool {
return uc.Combined != nil
}

func (uc *UnifiedConfig) GetFluentBitMetricsPort() uint16 {
if portStr := os.Getenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT"); portStr != "" {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like Quentin's suggestion to make these env vars prefixed with EXPERIMENTAL (EXPERIMENT? whatever our convention is).

Might also be good to make the environment variable names a constant, that way the tests can use the constant too.

if port, err := strconv.ParseUint(portStr, 10, 16); err == nil {
return uint16(port)
}
}
return fluentbit.MetricsPort
}

func (uc *UnifiedConfig) GetOtelMetricsPort() uint16 {
if portStr := os.Getenv("OPS_AGENT_OTEL_METRICS_PORT"); portStr != "" {
if port, err := strconv.ParseUint(portStr, 10, 16); err == nil {
return uint16(port)
}
}
return otel.MetricsPort
}

func (uc *UnifiedConfig) DeepCopy(ctx context.Context) (*UnifiedConfig, error) {
toYaml, err := yaml.Marshal(uc)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions confgenerator/fluentbit/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ func MetricsInputComponent() Component {
}
}

func MetricsOutputComponent() Component {
func MetricsOutputComponent(port int) Component {
return Component{
Kind: "OUTPUT",
Config: map[string]string{
// https://docs.fluentbit.io/manual/pipeline/outputs/prometheus-exporter
"Name": "prometheus_exporter",
"Match": "*",
"host": "0.0.0.0",
"port": strconv.Itoa(MetricsPort),
"port": strconv.Itoa(port),
},
}
}
3 changes: 2 additions & 1 deletion confgenerator/otel/modular.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ type ModularConfig struct {
Pipelines map[string]Pipeline
Exporters map[ExporterType]ExporterComponents
Extensions map[string]Component
MetricsPort uint16

// Test-only options:
// Don't generate any self-metrics
Expand Down Expand Up @@ -171,7 +172,7 @@ func (c ModularConfig) Generate(ctx context.Context) (string, error) {
"exporter": map[string]interface{}{
"prometheus": map[string]interface{}{
"host": "0.0.0.0",
"port": MetricsPort,
"port": c.MetricsPort,

// See https://docs.datadoghq.com/opentelemetry/migrate/collector_0_120_0/#changes-to-prometheus-server-reader-defaults for why these fields are needed.
// See https://github.com/open-telemetry/opentelemetry-collector/pull/11611/files#diff-150d72bc611b4b0de17f646768979b15936f820a029cafa91c4037d50ae47e5a for the actual upstream otel code changes.
Expand Down
61 changes: 61 additions & 0 deletions confgenerator/port_env_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package confgenerator

import (
"os"
"testing"

"github.com/GoogleCloudPlatform/ops-agent/confgenerator/fluentbit"
"github.com/GoogleCloudPlatform/ops-agent/confgenerator/otel"
)

func TestPortOverriddenByEnv(t *testing.T) {
// Set env vars
os.Setenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT", "40002")
os.Setenv("OPS_AGENT_OTEL_METRICS_PORT", "40001")
defer os.Unsetenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT")
defer os.Unsetenv("OPS_AGENT_OTEL_METRICS_PORT")

uc := &UnifiedConfig{}

if port := uc.GetFluentBitMetricsPort(); port != 40002 {
t.Errorf("Expected Fluent Bit port 40002, got %d", port)
}

if port := uc.GetOtelMetricsPort(); port != 40001 {
t.Errorf("Expected OTel port 40001, got %d", port)
}
}

func TestPortDefaultWhenEnvEmpty(t *testing.T) {
// Ensure env vars are not set
os.Unsetenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT")
os.Unsetenv("OPS_AGENT_OTEL_METRICS_PORT")

uc := &UnifiedConfig{}

if port := uc.GetFluentBitMetricsPort(); port != fluentbit.MetricsPort {
t.Errorf("Expected Fluent Bit default port %d, got %d", fluentbit.MetricsPort, port)
}

if port := uc.GetOtelMetricsPort(); port != otel.MetricsPort {
t.Errorf("Expected OTel default port %d, got %d", otel.MetricsPort, port)
}
}

func TestPortInvalidEnvFallbacksToDefault(t *testing.T) {
// Set invalid env vars
os.Setenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT", "invalid")
os.Setenv("OPS_AGENT_OTEL_METRICS_PORT", "65536") // Out of range for uint16
defer os.Unsetenv("OPS_AGENT_FLUENT_BIT_METRICS_PORT")
defer os.Unsetenv("OPS_AGENT_OTEL_METRICS_PORT")

uc := &UnifiedConfig{}

if port := uc.GetFluentBitMetricsPort(); port != fluentbit.MetricsPort {
t.Errorf("Expected Fluent Bit default port %d for invalid env, got %d", fluentbit.MetricsPort, port)
}

if port := uc.GetOtelMetricsPort(); port != otel.MetricsPort {
t.Errorf("Expected OTel default port %d for invalid env, got %d", otel.MetricsPort, port)
}
}
81 changes: 81 additions & 0 deletions integration_test/ops_agent_test/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6131,6 +6131,87 @@ func TestUninstallRemovesService(t *testing.T) {
})
}

func TestMetricsPortOverrideEnv(t *testing.T) {
t.Parallel()
RunForEachImageAndFeatureFlag(t, []string{agents.OtlpHttpExporterFeatureFlag}, func(t *testing.T, imageSpec string, feature string) {
t.Parallel()
if gce.IsWindows(imageSpec) {
t.Skip("Skipping on Windows for now as it requires different environment variable setup")
}
if gce.IsOpsAgentUAPPlugin() {
t.Skip("Skipping on UAP plugin as it is not supported")
}
ctx, logger, vm := setupMainLogAndVM(t, imageSpec)

// Setup agent with default config first
if err := agents.SetupOpsAgentWithFeatureFlag(ctx, logger, vm, "", feature); err != nil {
t.Fatal(err)
}

// Stop the agent to avoid race conditions while setting up overrides
if _, err := gce.RunRemotely(ctx, logger, vm, "sudo systemctl stop google-cloud-ops-agent"); err != nil {
t.Fatal(err)
}

// Set up systemd overrides for Fluent Bit
fbOverrideDir := "/etc/systemd/system/google-cloud-ops-agent-fluent-bit.service.d"
fbOverrideFile := fbOverrideDir + "/override.conf"
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("sudo mkdir -p %s", fbOverrideDir)); err != nil {
t.Fatal(err)
}
fbOverrideContent := `[Service]
Environment="OPS_AGENT_FLUENT_BIT_METRICS_PORT=40002"
`
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("echo '%s' | sudo tee %s", fbOverrideContent, fbOverrideFile)); err != nil {
t.Fatal(err)
}

// Set up systemd overrides for OTel Collector
otelOverrideDir := "/etc/systemd/system/google-cloud-ops-agent-opentelemetry-collector.service.d"
otelOverrideFile := otelOverrideDir + "/override.conf"
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("sudo mkdir -p %s", otelOverrideDir)); err != nil {
t.Fatal(err)
}
otelOverrideContent := `[Service]
Environment="OPS_AGENT_OTEL_METRICS_PORT=40001"
Environment="OPS_AGENT_FLUENT_BIT_METRICS_PORT=40002"
`
if _, err := gce.RunRemotely(ctx, logger, vm, fmt.Sprintf("echo '%s' | sudo tee %s", otelOverrideContent, otelOverrideFile)); err != nil {
t.Fatal(err)
}

// Reload systemd and restart agent
if _, err := gce.RunRemotely(ctx, logger, vm, "sudo systemctl daemon-reload"); err != nil {
t.Fatal(err)
}
if _, err := gce.RunRemotely(ctx, logger, vm, "sudo systemctl start google-cloud-ops-agent"); err != nil {
t.Fatal(err)
}

// Wait for agent to start up
time.Sleep(20 * time.Second)

// Verify that we can scrape metrics from the new ports
// Fluent Bit metrics on 40002
fbMetricsOut, err := gce.RunRemotely(ctx, logger, vm, "curl -s localhost:40002/metrics")
if err != nil {
t.Fatalf("Failed to scrape Fluent Bit metrics on port 40002: %v", err)
}
if !strings.Contains(fbMetricsOut.Stdout, "fluentbit_uptime") {
t.Fatalf("Fluent Bit metrics on port 40002 do not contain expected content. Output: %s", fbMetricsOut.Stdout)
}

// OTel Collector metrics on 40001
otelMetricsOut, err := gce.RunRemotely(ctx, logger, vm, "curl -s localhost:40001/metrics")
if err != nil {
t.Fatalf("Failed to scrape OTel Collector metrics on port 40001: %v", err)
}
if !strings.Contains(otelMetricsOut.Stdout, "otelcol_") {
t.Fatalf("OTel Collector metrics on port 40001 do not contain expected content. Output: %s", otelMetricsOut.Stdout)
}
})
}

func TestMain(m *testing.M) {
code := m.Run()
gce.CleanupKeysOrDie()
Expand Down
Loading