Skip to content

Commit f3c92f3

Browse files
blewis12schmikeipjanotti
authored
Fix: Skip vSAN collection and logging when all vSAN metrics are disabled (open-telemetry#43704)
#### Description This PR adds a new method in the `vcenter` receiver component called `hasEnabledVSANMetrics()`. Whether or not vSAN metrics are enabled is calculated by checking if all metrics are disabled, if any are enabled then the method will return `true`, else it will return `false`. The method is then used to guard vSAN-related initialization and logging logic - when all vSAN metrics are disabled, the receiver will now skip vSAN API setup and suppress related warning or error logs. #### Link to tracking issue open-telemetry#38489 #### Testing I've added `metadata_test.go`, which uses the `metadata` file itself to detect which vSAN metrics are available, and will fail the test if any are added or removed. The test will pass if what is returned from `getCheckedVSANMetrics()` matches what is in `hasEnabledVSANMetrics()`. This does add an extra weird step for developers editing the metadata yaml, since the test basically instructs them to add or remove a metric from two places: the test and the source file. It's a bit redundant, but feels clear enough for now Maybe there's a better way to achieve this? I also played around with a reflection based approach in `hasEnabledVSANMetrics` but this felt a bit icky. Open to feedback on that one :) #### Documentation --------- Co-authored-by: Keith Schmitt <[email protected]> Co-authored-by: Paulo Janotti <[email protected]>
1 parent f1f50ba commit f3c92f3

File tree

5 files changed

+243
-39
lines changed

5 files changed

+243
-39
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'bug_fix'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
7+
component: 'receiver/vcenter'
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: 'Skip vSAN collection and logging when all vSAN metrics are disabled'
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [38489]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [api]

receiver/vcenterreceiver/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ require (
2727
go.uber.org/goleak v1.3.0
2828
go.uber.org/multierr v1.11.0
2929
go.uber.org/zap v1.27.0
30+
gopkg.in/yaml.v3 v3.0.1
3031
)
3132

3233
require (
@@ -114,7 +115,6 @@ require (
114115
google.golang.org/genproto/googleapis/rpc v0.0.0-20250804133106-a7a43d27e69b // indirect
115116
google.golang.org/grpc v1.76.0 // indirect
116117
google.golang.org/protobuf v1.36.10 // indirect
117-
gopkg.in/yaml.v3 v3.0.1 // indirect
118118
)
119119

120120
replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest => ../../pkg/pdatatest
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package vcenterreceiver
5+
6+
import (
7+
"os"
8+
"path/filepath"
9+
"strings"
10+
"testing"
11+
12+
"gopkg.in/yaml.v3"
13+
)
14+
15+
// TestVSANMetricsFromMetadata ensures hasEnabledVSANMetrics() covers all vSAN metrics
16+
// defined in metadata.yaml. This test will fail if a new vSAN metric is added to
17+
// metadata.yaml but not included in hasEnabledVSANMetrics().
18+
func TestVSANMetricsFromMetadata(t *testing.T) {
19+
vSANMetricsFromYAML := getVSANMetricsFromMetadataYAML(t)
20+
checkedMetrics := getCheckedVSANMetrics()
21+
22+
// Convert to sets for comparison
23+
yamlSet := make(map[string]bool)
24+
for _, metric := range vSANMetricsFromYAML {
25+
yamlSet[metric] = true
26+
}
27+
28+
checkedSet := make(map[string]bool)
29+
for _, metric := range checkedMetrics {
30+
checkedSet[metric] = true
31+
}
32+
33+
var missingInFunction []string
34+
for metric := range yamlSet {
35+
if !checkedSet[metric] {
36+
missingInFunction = append(missingInFunction, metric)
37+
}
38+
}
39+
40+
var extraInFunction []string
41+
for metric := range checkedSet {
42+
if !yamlSet[metric] {
43+
extraInFunction = append(extraInFunction, metric)
44+
}
45+
}
46+
47+
if len(missingInFunction) > 0 {
48+
t.Errorf("hasEnabledVSANMetrics() is missing these vSAN metrics from metadata.yaml:")
49+
for _, metric := range missingInFunction {
50+
t.Errorf(" - %s", metric)
51+
}
52+
t.Error("Please add the missing metrics to hasEnabledVSANMetrics() function")
53+
}
54+
55+
if len(extraInFunction) > 0 {
56+
t.Errorf("hasEnabledVSANMetrics() checks these metrics not found in metadata.yaml:")
57+
for _, metric := range extraInFunction {
58+
t.Errorf(" - %s", metric)
59+
}
60+
t.Error("Please remove non-vSAN metrics from hasEnabledVSANMetrics() function")
61+
}
62+
63+
if len(missingInFunction) == 0 && len(extraInFunction) == 0 {
64+
t.Logf("All %d vSAN metrics from metadata.yaml are properly checked", len(vSANMetricsFromYAML))
65+
}
66+
}
67+
68+
type metadataYAML struct {
69+
Metrics map[string]any `yaml:"metrics"`
70+
}
71+
72+
func getVSANMetricsFromMetadataYAML(t *testing.T) []string {
73+
metadataPath := findMetadataYAML(t)
74+
75+
data, err := os.ReadFile(metadataPath)
76+
if err != nil {
77+
t.Fatalf("Failed to read metadata.yaml: %v", err)
78+
}
79+
80+
var metadata metadataYAML
81+
if err := yaml.Unmarshal(data, &metadata); err != nil {
82+
t.Fatalf("Failed to parse metadata.yaml: %v", err)
83+
}
84+
85+
// Extract vSAN metrics
86+
var vSANMetrics []string
87+
for metricName := range metadata.Metrics {
88+
if strings.Contains(metricName, "vsan") {
89+
vSANMetrics = append(vSANMetrics, metricName)
90+
}
91+
}
92+
93+
return vSANMetrics
94+
}
95+
96+
func findMetadataYAML(t *testing.T) string {
97+
dir, err := os.Getwd()
98+
if err != nil {
99+
t.Fatalf("Failed to get current directory: %v", err)
100+
}
101+
102+
// Look for metadata.yaml in current directory
103+
metadataPath := filepath.Join(dir, "metadata.yaml")
104+
if _, err := os.Stat(metadataPath); err == nil {
105+
return metadataPath
106+
}
107+
108+
t.Fatalf("Could not find metadata.yaml")
109+
return ""
110+
}
111+
112+
// getCheckedVSANMetrics returns the metrics checked in hasEnabledVSANMetrics()
113+
func getCheckedVSANMetrics() []string {
114+
// This should match exactly what's in hasEnabledVSANMetrics()
115+
return []string{
116+
// Cluster vSAN metrics
117+
"vcenter.cluster.vsan.congestions",
118+
"vcenter.cluster.vsan.latency.avg",
119+
"vcenter.cluster.vsan.operations",
120+
"vcenter.cluster.vsan.throughput",
121+
122+
// Host vSAN metrics
123+
"vcenter.host.vsan.cache.hit_rate",
124+
"vcenter.host.vsan.congestions",
125+
"vcenter.host.vsan.latency.avg",
126+
"vcenter.host.vsan.operations",
127+
"vcenter.host.vsan.throughput",
128+
129+
// VM vSAN metrics
130+
"vcenter.vm.vsan.latency.avg",
131+
"vcenter.vm.vsan.operations",
132+
"vcenter.vm.vsan.throughput",
133+
}
134+
}

receiver/vcenterreceiver/processors.go

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,16 @@ func (v *vcenterMetricScraper) buildHostMetrics(
198198
v.recordHostPerformanceMetrics(hostPerfMetrics)
199199
}
200200

201-
if hs.Config == nil || hs.Config.VsanHostConfig == nil || hs.Config.VsanHostConfig.ClusterInfo == nil {
202-
v.logger.Info("couldn't determine UUID necessary for vSAN metrics for host " + hs.Name)
203-
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
204-
return vmRefToComputeRef, nil
205-
}
206-
vSANMetrics := v.scrapeData.hostVSANMetricsByUUID[hs.Config.VsanHostConfig.ClusterInfo.NodeUuid]
207-
if vSANMetrics != nil {
208-
v.recordHostVSANMetrics(vSANMetrics)
201+
if v.hasEnabledVSANMetrics() {
202+
if hs.Config == nil || hs.Config.VsanHostConfig == nil || hs.Config.VsanHostConfig.ClusterInfo == nil {
203+
v.logger.Info("couldn't determine UUID necessary for vSAN metrics for host " + hs.Name)
204+
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
205+
return vmRefToComputeRef, nil
206+
}
207+
vSANMetrics := v.scrapeData.hostVSANMetricsByUUID[hs.Config.VsanHostConfig.ClusterInfo.NodeUuid]
208+
if vSANMetrics != nil {
209+
v.recordHostVSANMetrics(vSANMetrics)
210+
}
209211
}
210212
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
211213

@@ -332,9 +334,11 @@ func (v *vcenterMetricScraper) buildVMMetrics(
332334
v.recordVMPerformanceMetrics(perfMetrics)
333335
}
334336

335-
vSANMetrics := v.scrapeData.vmVSANMetricsByUUID[vm.Config.InstanceUuid]
336-
if vSANMetrics != nil {
337-
v.recordVMVSANMetrics(vSANMetrics)
337+
if v.hasEnabledVSANMetrics() {
338+
vSANMetrics := v.scrapeData.vmVSANMetricsByUUID[vm.Config.InstanceUuid]
339+
if vSANMetrics != nil {
340+
v.recordVMVSANMetrics(vSANMetrics)
341+
}
338342
}
339343
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
340344

@@ -380,16 +384,19 @@ func (v *vcenterMetricScraper) buildClusterMetrics(
380384
}
381385
// Record and emit Cluster metric data points
382386
v.recordClusterStats(ts, cr, vmGroupInfo)
383-
vSANConfig := cr.ConfigurationEx.(*types.ClusterConfigInfoEx).VsanConfigInfo
384-
if vSANConfig == nil || vSANConfig.Enabled == nil || !*vSANConfig.Enabled || vSANConfig.DefaultConfig == nil {
385-
v.logger.Info("couldn't determine UUID necessary for vSAN metrics for cluster " + cr.Name)
386-
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
387-
return err
388-
}
389387

390-
vSANMetrics := v.scrapeData.clusterVSANMetricsByUUID[vSANConfig.DefaultConfig.Uuid]
391-
if vSANMetrics != nil {
392-
v.recordClusterVSANMetrics(vSANMetrics)
388+
if v.hasEnabledVSANMetrics() {
389+
vSANConfig := cr.ConfigurationEx.(*types.ClusterConfigInfoEx).VsanConfigInfo
390+
if vSANConfig == nil || vSANConfig.Enabled == nil || !*vSANConfig.Enabled || vSANConfig.DefaultConfig == nil {
391+
v.logger.Info("couldn't determine UUID necessary for vSAN metrics for cluster " + cr.Name)
392+
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))
393+
return err
394+
}
395+
396+
vSANMetrics := v.scrapeData.clusterVSANMetricsByUUID[vSANConfig.DefaultConfig.Uuid]
397+
if vSANMetrics != nil {
398+
v.recordClusterVSANMetrics(vSANMetrics)
399+
}
393400
}
394401

395402
v.mb.EmitForResource(metadata.WithResource(rb.Emit()))

receiver/vcenterreceiver/scraper.go

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,36 @@ func newVcenterScrapeData() *vcenterScrapeData {
8888
}
8989
}
9090

91+
func (v *vcenterMetricScraper) hasEnabledVSANMetrics() bool {
92+
metrics := v.config.Metrics
93+
94+
// Check cluster vSAN metrics
95+
if metrics.VcenterClusterVsanCongestions.Enabled ||
96+
metrics.VcenterClusterVsanLatencyAvg.Enabled ||
97+
metrics.VcenterClusterVsanOperations.Enabled ||
98+
metrics.VcenterClusterVsanThroughput.Enabled {
99+
return true
100+
}
101+
102+
// Check host vSAN metrics
103+
if metrics.VcenterHostVsanCacheHitRate.Enabled ||
104+
metrics.VcenterHostVsanCongestions.Enabled ||
105+
metrics.VcenterHostVsanLatencyAvg.Enabled ||
106+
metrics.VcenterHostVsanOperations.Enabled ||
107+
metrics.VcenterHostVsanThroughput.Enabled {
108+
return true
109+
}
110+
111+
// Check VM vSAN metrics
112+
if metrics.VcenterVMVsanLatencyAvg.Enabled ||
113+
metrics.VcenterVMVsanOperations.Enabled ||
114+
metrics.VcenterVMVsanThroughput.Enabled {
115+
return true
116+
}
117+
118+
return false
119+
}
120+
91121
func (v *vcenterMetricScraper) Start(ctx context.Context, _ component.Host) error {
92122
connectErr := v.client.EnsureConnection(ctx)
93123
// don't fail to start if we cannot establish connection, just log an error
@@ -246,13 +276,15 @@ func (v *vcenterMetricScraper) scrapeComputes(ctx context.Context, dc *mo.Datace
246276
}
247277
}
248278

249-
// Get all Cluster vSAN metrics and store for later retrieval
250-
vSANMetrics, err := v.client.VSANClusters(ctx, v.scrapeData.clusterRefs)
251-
if err != nil {
252-
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for Clusters: %w", err))
253-
return
279+
if v.hasEnabledVSANMetrics() {
280+
// Get all Cluster vSAN metrics and store for later retrieval (only if vSAN metrics are enabled)
281+
vSANMetrics, err := v.client.VSANClusters(ctx, v.scrapeData.clusterRefs)
282+
if err != nil {
283+
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for Clusters: %w", err))
284+
return
285+
}
286+
v.scrapeData.clusterVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
254287
}
255-
v.scrapeData.clusterVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
256288
}
257289

258290
// scrapeHosts scrapes and stores all relevant metric/property data for a Datacenter's HostSystems
@@ -287,12 +319,15 @@ func (v *vcenterMetricScraper) scrapeHosts(ctx context.Context, dc *mo.Datacente
287319
v.scrapeData.hostPerfMetricsByRef = results.resultsByRef
288320
}
289321

290-
vSANMetrics, err := v.client.VSANHosts(ctx, v.scrapeData.clusterRefs)
291-
if err != nil {
292-
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for Hosts: %w", err))
293-
return
322+
if v.hasEnabledVSANMetrics() {
323+
// Get all Host vSAN metrics and store for later retrieval
324+
vSANMetrics, err := v.client.VSANHosts(ctx, v.scrapeData.clusterRefs)
325+
if err != nil {
326+
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for Hosts: %w", err))
327+
return
328+
}
329+
v.scrapeData.hostVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
294330
}
295-
v.scrapeData.hostVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
296331
}
297332

298333
// scrapeResourcePools scrapes and stores all relevant property data for a Datacenter's ResourcePools/vApps
@@ -343,12 +378,13 @@ func (v *vcenterMetricScraper) scrapeVirtualMachines(ctx context.Context, dc *mo
343378
v.scrapeData.vmPerfMetricsByRef = results.resultsByRef
344379
}
345380

346-
// Get all VirtualMachine vSAN metrics and store for later retrieval
347-
vSANMetrics, err := v.client.VSANVirtualMachines(ctx, v.scrapeData.clusterRefs)
348-
if err != nil {
349-
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for VirtualMachines: %w", err))
350-
return
381+
if v.hasEnabledVSANMetrics() {
382+
// Get all VirtualMachine vSAN metrics and store for later retrieval
383+
vSANMetrics, err := v.client.VSANVirtualMachines(ctx, v.scrapeData.clusterRefs)
384+
if err != nil {
385+
errs.AddPartial(1, fmt.Errorf("failed to retrieve vSAN metrics for VirtualMachines: %w", err))
386+
return
387+
}
388+
v.scrapeData.vmVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
351389
}
352-
353-
v.scrapeData.vmVSANMetricsByUUID = vSANMetrics.MetricResultsByUUID
354390
}

0 commit comments

Comments
 (0)