Skip to content

Commit 19792c0

Browse files
fix (resilience): Added more checks for pointers and improved error m… (#26)
* fix (resilience): Added more checks for pointers and improved error management
1 parent d40d895 commit 19792c0

14 files changed

+144
-76
lines changed

internal/client/client.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,16 @@ func New(vmURL string, vmUsername string, vmPassword string, ValidateSSL bool) (
1717
defer cancel()
1818

1919
// // Parse URL from string
20-
url, err := soap.ParseURL(vmURL)
20+
urlParsed, err := soap.ParseURL(vmURL)
2121
if err != nil {
2222
return nil, err
2323
}
2424

2525
// Override username and/or password as required
26-
setCredentials(url, vmUsername, vmPassword)
26+
setCredentials(urlParsed, vmUsername, vmPassword)
2727

2828
// Connect and log in to ESX/i or vCenter
29-
return govmomi.NewClient(ctx, url, !ValidateSSL)
29+
return govmomi.NewClient(ctx, urlParsed, !ValidateSSL)
3030
}
3131

3232
func setCredentials(u *url.URL, un string, pw string) {

internal/collect/clusters.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ func Clusters(config *load.Config) {
1717
for i, dc := range config.Datacenters {
1818
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"ComputeResource"}, true)
1919
if err != nil {
20-
config.Logrus.WithError(err).Fatal("failed to create ComputeResource container view")
20+
config.Logrus.WithError(err).Error("failed to create ComputeResource container view")
21+
continue
2122
}
2223
defer cv.Destroy(ctx)
2324
var clusters []mo.ClusterComputeResource
@@ -28,7 +29,8 @@ func Clusters(config *load.Config) {
2829
[]string{"summary", "host", "datastore", "name", "network", "configuration"},
2930
&clusters)
3031
if err != nil {
31-
config.Logrus.WithError(err).Fatal("failed to retrieve ClusterComputeResource")
32+
config.Logrus.WithError(err).Error("failed to retrieve ClusterComputeResource")
33+
continue
3234
}
3335
for j := 0; j < len(clusters); j++ {
3436
config.Datacenters[i].Clusters[clusters[j].Self] = &clusters[j]

internal/collect/datastores.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@ func Datastores(config *load.Config) {
1818
for i, dc := range config.Datacenters {
1919
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"Datastore"}, true)
2020
if err != nil {
21-
config.Logrus.WithError(err).Fatal("failed to create Datastore container view")
21+
config.Logrus.WithError(err).Error("failed to create Datastore container view")
22+
continue
2223
}
2324
defer cv.Destroy(ctx)
2425

2526
var datastores []mo.Datastore
2627
// Reference: https://code.vmware.com/apis/42/vsphere/doc/vim.Datastore.html
2728
err = cv.Retrieve(ctx, []string{"Datastore"}, nil, &datastores)
2829
if err != nil {
29-
config.Logrus.WithError(err).Fatal("failed to retrieve Datastore")
30+
config.Logrus.WithError(err).Error("failed to retrieve Datastore")
31+
continue
3032
}
3133
for j := 0; j < len(datastores); j++ {
3234
config.Datacenters[i].Datastores[datastores[j].Self] = &datastores[j]

internal/collect/hosts.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ func Hosts(config *load.Config) {
1919

2020
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"HostSystem"}, true)
2121
if err != nil {
22-
config.Logrus.WithError(err).Fatal("failed to create HostSystem container view")
22+
config.Logrus.WithError(err).Error("failed to create HostSystem container view")
23+
continue
2324
}
2425

2526
defer cv.Destroy(ctx)
@@ -32,7 +33,8 @@ func Hosts(config *load.Config) {
3233
[]string{"summary", "overallStatus", "config", "network", "vm", "runtime", "parent", "datastore"},
3334
&hosts)
3435
if err != nil {
35-
config.Logrus.WithError(err).Fatal("failed to retrieve HostSystems")
36+
config.Logrus.WithError(err).Error("failed to retrieve HostSystems")
37+
continue
3638
}
3739
for j := 0; j < len(hosts); j++ {
3840
config.Datacenters[i].Hosts[hosts[j].Self] = &hosts[j]

internal/collect/networks.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@ func Networks(config *load.Config) {
1818
for i, dc := range config.Datacenters {
1919
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"Network"}, true)
2020
if err != nil {
21-
config.Logrus.WithError(err).Fatal("failed to create Network container view")
21+
config.Logrus.WithError(err).Error("failed to create Network container view")
22+
continue
2223
}
2324
defer cv.Destroy(ctx)
2425

2526
var networks []mo.Network
2627
// Reference: http://pubs.vmware.com/vsphere-60/topic/com.vmware.wssdk.apiref.doc/vim.Network.html
2728
err = cv.Retrieve(ctx, []string{"Network"}, nil, &networks)
2829
if err != nil {
29-
config.Logrus.WithError(err).Fatal("failed to retrieve Networks")
30+
config.Logrus.WithError(err).Error("failed to retrieve Networks")
31+
continue
3032
}
3133
for j := 0; j < len(networks); j++ {
3234
config.Datacenters[i].Networks[networks[j].Self] = &networks[j]

internal/collect/resourcepools.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ func ResourcePools(config *load.Config) {
1818
for i, dc := range config.Datacenters {
1919
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"ResourcePool"}, true)
2020
if err != nil {
21-
config.Logrus.WithError(err).Fatal("failed to create ResourcePool container view")
21+
config.Logrus.WithError(err).Error("failed to create ResourcePool container view")
22+
continue
2223
}
2324
defer cv.Destroy(ctx)
2425
var resourcePools []mo.ResourcePool
@@ -28,7 +29,8 @@ func ResourcePools(config *load.Config) {
2829
[]string{"summary", "owner", "parent", "runtime", "name", "overallStatus", "vm", "resourcePool"},
2930
&resourcePools)
3031
if err != nil {
31-
config.Logrus.WithError(err).Fatal("failed to retrieve ResourcePools")
32+
config.Logrus.WithError(err).Error("failed to retrieve ResourcePools")
33+
continue
3234
}
3335
for j := 0; j < len(resourcePools); j++ {
3436
config.Datacenters[i].ResourcePools[resourcePools[j].Self] = &resourcePools[j]

internal/collect/vms.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ func VirtualMachines(config *load.Config) {
1818
for i, dc := range config.Datacenters {
1919
cv, err := m.CreateContainerView(ctx, dc.Datacenter.Reference(), []string{"VirtualMachine"}, true)
2020
if err != nil {
21-
config.Logrus.WithError(err).Fatal("failed to create VirtualMachine container view")
21+
config.Logrus.WithError(err).Error("failed to create VirtualMachine container view")
22+
continue
2223
}
2324
defer cv.Destroy(ctx)
2425

@@ -30,7 +31,8 @@ func VirtualMachines(config *load.Config) {
3031
[]string{"summary", "network", "config", "guest", "runtime", "resourcePool", "datastore", "overallStatus"},
3132
&vms)
3233
if err != nil {
33-
config.Logrus.WithError(err).Fatal("failed to retrieve VM Summaries")
34+
config.Logrus.WithError(err).Error("failed to retrieve VM Summaries")
35+
continue
3436
}
3537
for j := 0; j < len(vms); j++ {
3638
config.Datacenters[i].VirtualMachines[vms[j].Self] = &vms[j]

internal/process/cluster.go

+17-12
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ func createClusterSamples(config *load.Config) {
1313
for _, dc := range config.Datacenters {
1414
for _, cluster := range dc.Clusters {
1515
// // resolve hypervisor host
16-
summary := cluster.Summary.GetComputeResourceSummary()
1716
datacenterName := dc.Datacenter.Name
1817

1918
//Retrieving the list of host belonging to the cluster
@@ -42,7 +41,11 @@ func createClusterSamples(config *load.Config) {
4241

4342
entityName := sanitizeEntityName(config, cluster.Name, datacenterName)
4443

45-
ms := createNewEntityWithMetricSet(config, entityTypeCluster, entityName, entityName)
44+
ms, err := createNewEntityWithMetricSet(config, entityTypeCluster, entityName, entityName)
45+
if err != nil {
46+
config.Logrus.WithError(err).WithField("clusterName", entityName).Error("failed to create metricSet")
47+
continue
48+
}
4649

4750
if config.Args.DatacenterLocation != "" {
4851
checkError(config, ms.SetMetric("datacenterLocation", config.Args.DatacenterLocation, metric.ATTRIBUTE))
@@ -56,16 +59,18 @@ func createClusterSamples(config *load.Config) {
5659
checkError(config, ms.SetMetric("hostList", hostList, metric.ATTRIBUTE))
5760
checkError(config, ms.SetMetric("datastoreList", datastoreList, metric.ATTRIBUTE))
5861

59-
checkError(config, ms.SetMetric("overallStatus", string(summary.OverallStatus), metric.ATTRIBUTE))
60-
61-
checkError(config, ms.SetMetric("cpu.cores", summary.NumCpuCores, metric.GAUGE))
62-
checkError(config, ms.SetMetric("cpu.threads", summary.NumCpuThreads, metric.GAUGE))
63-
checkError(config, ms.SetMetric("cpu.totalEffectiveMHz", summary.EffectiveCpu, metric.GAUGE))
64-
checkError(config, ms.SetMetric("cpu.totalMHz", summary.TotalCpu, metric.GAUGE))
65-
checkError(config, ms.SetMetric("mem.size", summary.TotalMemory/(1<<20), metric.GAUGE))
66-
checkError(config, ms.SetMetric("mem.effectiveSize", summary.EffectiveMemory, metric.GAUGE))
67-
checkError(config, ms.SetMetric("effectiveHosts", summary.NumEffectiveHosts, metric.GAUGE))
68-
checkError(config, ms.SetMetric("hosts", summary.NumHosts, metric.GAUGE))
62+
summary := cluster.Summary.GetComputeResourceSummary()
63+
if summary != nil {
64+
checkError(config, ms.SetMetric("overallStatus", string(summary.OverallStatus), metric.ATTRIBUTE))
65+
checkError(config, ms.SetMetric("cpu.cores", summary.NumCpuCores, metric.GAUGE))
66+
checkError(config, ms.SetMetric("cpu.threads", summary.NumCpuThreads, metric.GAUGE))
67+
checkError(config, ms.SetMetric("cpu.totalEffectiveMHz", summary.EffectiveCpu, metric.GAUGE))
68+
checkError(config, ms.SetMetric("cpu.totalMHz", summary.TotalCpu, metric.GAUGE))
69+
checkError(config, ms.SetMetric("mem.size", summary.TotalMemory/(1<<20), metric.GAUGE))
70+
checkError(config, ms.SetMetric("mem.effectiveSize", summary.EffectiveMemory, metric.GAUGE))
71+
checkError(config, ms.SetMetric("effectiveHosts", summary.NumEffectiveHosts, metric.GAUGE))
72+
checkError(config, ms.SetMetric("hosts", summary.NumHosts, metric.GAUGE))
73+
}
6974

7075
//DRS metrics
7176
if cluster.Configuration.DrsConfig.Enabled != nil {

internal/process/datacenter.go

+22-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ func createDatacenterSamples(config *load.Config) {
3333
datacenterName := dc.Datacenter.Name
3434
entityName := sanitizeEntityName(config, datacenterName, "")
3535
uniqueIdentifier := entityName
36-
ms := createNewEntityWithMetricSet(config, entityTypeDatacenter, entityName, uniqueIdentifier)
36+
ms, err := createNewEntityWithMetricSet(config, entityTypeDatacenter, entityName, uniqueIdentifier)
37+
if err != nil {
38+
config.Logrus.WithError(err).WithField("datacenterName", entityName).WithField("uniqueIdentifier", uniqueIdentifier).Error("failed to create metricSet")
39+
continue
40+
}
3741

3842
for _, datastore := range dc.Datastores {
3943
totalDatastoreCapacity = totalDatastoreCapacity + datastore.Summary.Capacity
@@ -48,21 +52,28 @@ func createDatacenterSamples(config *load.Config) {
4852
}
4953

5054
for _, host := range dc.Hosts {
51-
totalMHz = totalMHz + (float64(host.Summary.Hardware.CpuMhz) * float64(host.Summary.Hardware.NumCpuCores))
52-
cpuOverallUsage = cpuOverallUsage + float64(host.Summary.QuickStats.OverallCpuUsage)
53-
totalCpuHost = totalCpuHost + host.Summary.Hardware.NumCpuCores
54-
totalMemoryHost = totalMemoryHost + host.Summary.Hardware.MemorySize/(1<<20)
55-
totalMemoryUsedHost = totalMemoryUsedHost + host.Summary.QuickStats.OverallMemoryUsage
55+
if host.Summary.Hardware != nil {
56+
totalMHz = totalMHz + (float64(host.Summary.Hardware.CpuMhz) * float64(host.Summary.Hardware.NumCpuCores))
57+
cpuOverallUsage = cpuOverallUsage + float64(host.Summary.QuickStats.OverallCpuUsage)
58+
totalCpuHost = totalCpuHost + host.Summary.Hardware.NumCpuCores
59+
totalMemoryHost = totalMemoryHost + host.Summary.Hardware.MemorySize/(1<<20)
60+
totalMemoryUsedHost = totalMemoryUsedHost + host.Summary.QuickStats.OverallMemoryUsage
61+
}
5662
}
5763

58-
cpuPercentHost := cpuOverallUsage / totalMHz * 100
59-
memoryPercentHost := float64(totalMemoryUsedHost) / float64(totalMemoryHost) * 100
64+
if totalMHz != 0 {
65+
cpuPercentHost := cpuOverallUsage / totalMHz * 100
66+
checkError(config, ms.SetMetric("cpu.overallUsagePercentage", cpuPercentHost, metric.GAUGE))
67+
}
68+
69+
if totalMemoryHost != 0 {
70+
memoryPercentHost := float64(totalMemoryUsedHost) / float64(totalMemoryHost) * 100
71+
checkError(config, ms.SetMetric("mem.usagePercentage", memoryPercentHost, metric.GAUGE))
72+
}
6073

61-
checkError(config, ms.SetMetric("mem.usage", totalMemoryUsedHost, metric.GAUGE))
6274
checkError(config, ms.SetMetric("mem.size", totalMemoryHost, metric.GAUGE))
63-
checkError(config, ms.SetMetric("mem.usagePercentage", memoryPercentHost, metric.GAUGE))
75+
checkError(config, ms.SetMetric("mem.usage", totalMemoryUsedHost, metric.GAUGE))
6476
checkError(config, ms.SetMetric("cpu.cores", totalCpuHost, metric.GAUGE))
65-
checkError(config, ms.SetMetric("cpu.overallUsagePercentage", cpuPercentHost, metric.GAUGE))
6677
checkError(config, ms.SetMetric("cpu.overallUsage", cpuOverallUsage, metric.GAUGE))
6778
checkError(config, ms.SetMetric("cpu.totalMHz", totalMHz, metric.GAUGE))
6879

internal/process/datastores.go

+10-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@ func createDatastoreSamples(config *load.Config) {
2020

2121
dataStoreID := ds.Summary.Url
2222

23-
ms := createNewEntityWithMetricSet(config, entityTypeDatastore, entityName, dataStoreID)
23+
ms, err := createNewEntityWithMetricSet(config, entityTypeDatastore, entityName, dataStoreID)
24+
if err != nil {
25+
config.Logrus.WithError(err).WithField("datastoreName", entityName).WithField("dataStoreID", dataStoreID).Error("failed to create metricSet")
26+
continue
27+
}
2428

2529
if config.Args.DatacenterLocation != "" {
2630
checkError(config, ms.SetMetric("datacenterLocation", config.Args.DatacenterLocation, metric.ATTRIBUTE))
@@ -42,8 +46,11 @@ func createDatastoreSamples(config *load.Config) {
4246

4347
switch info := ds.Info.(type) {
4448
case *types.NasDatastoreInfo:
45-
checkError(config, ms.SetMetric("nas.remoteHost", info.Nas.RemoteHost, metric.ATTRIBUTE))
46-
checkError(config, ms.SetMetric("nas.remotePath", info.Nas.RemotePath, metric.ATTRIBUTE))
49+
if info.Nas != nil {
50+
checkError(config, ms.SetMetric("nas.remoteHost", info.Nas.RemoteHost, metric.ATTRIBUTE))
51+
checkError(config, ms.SetMetric("nas.remotePath", info.Nas.RemotePath, metric.ATTRIBUTE))
52+
53+
}
4754
}
4855
}
4956
}

internal/process/hosts.go

+23-9
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ import (
1313
func createHostSamples(config *load.Config) {
1414
for _, dc := range config.Datacenters {
1515
for _, host := range dc.Hosts {
16+
17+
if host.Summary.Hardware == nil {
18+
continue
19+
}
20+
// bios uuid identifies the host unequivocally and is available from vcenter/host api
21+
uuid := host.Summary.Hardware.Uuid
22+
1623
hostConfigName := host.Summary.Config.Name
1724
entityName := hostConfigName
1825
datacenterName := dc.Datacenter.Name
@@ -23,10 +30,11 @@ func createHostSamples(config *load.Config) {
2330

2431
entityName = sanitizeEntityName(config, entityName, datacenterName)
2532

26-
// bios uuid identifies the host unequivocally and is available from vcenter/host api
27-
uuid := host.Summary.Hardware.Uuid
28-
29-
ms := createNewEntityWithMetricSet(config, entityTypeHost, entityName, uuid)
33+
ms, err := createNewEntityWithMetricSet(config, entityTypeHost, entityName, uuid)
34+
if err != nil {
35+
config.Logrus.WithError(err).WithField("hostName", entityName).WithField("uuid", uuid).Error("failed to create metricSet")
36+
continue
37+
}
3038

3139
if cluster, ok := dc.Clusters[host.Parent.Reference()]; ok {
3240
checkError(config, ms.SetMetric("clusterName", cluster.Name, metric.ATTRIBUTE))
@@ -52,7 +60,6 @@ func createHostSamples(config *load.Config) {
5260
checkError(config, ms.SetMetric("datacenterLocation", config.Args.DatacenterLocation, metric.ATTRIBUTE))
5361
}
5462
checkError(config, ms.SetMetric("hypervisorHostname", hostConfigName, metric.ATTRIBUTE))
55-
checkError(config, ms.SetMetric("uuid", host.Summary.Hardware.Uuid, metric.ATTRIBUTE))
5663

5764
checkError(config, ms.SetMetric("vmCount", len(host.Vm), metric.GAUGE))
5865

@@ -74,6 +81,8 @@ func createHostSamples(config *load.Config) {
7481
}
7582
checkError(config, ms.SetMetric("networkNameList", networkList, metric.ATTRIBUTE))
7683

84+
checkError(config, ms.SetMetric("uuid", host.Summary.Hardware.Uuid, metric.ATTRIBUTE))
85+
7786
// memory
7887
memoryTotal := host.Summary.Hardware.MemorySize / (1 << 20)
7988
checkError(config, ms.SetMetric("mem.size", memoryTotal, metric.GAUGE))
@@ -97,8 +106,11 @@ func createHostSamples(config *load.Config) {
97106
TotalMHz := float64(CPUMhz) * float64(CPUCores)
98107
checkError(config, ms.SetMetric("cpu.totalMHz", TotalMHz, metric.GAUGE))
99108

100-
cpuPercent := (float64(host.Summary.QuickStats.OverallCpuUsage) / TotalMHz) * 100
101-
checkError(config, ms.SetMetric("cpu.percent", cpuPercent, metric.GAUGE))
109+
if TotalMHz != 0 {
110+
cpuPercent := (float64(host.Summary.QuickStats.OverallCpuUsage) / TotalMHz) * 100
111+
checkError(config, ms.SetMetric("cpu.percent", cpuPercent, metric.GAUGE))
112+
}
113+
102114
checkError(config, ms.SetMetric("cpu.overallUsage", host.Summary.QuickStats.OverallCpuUsage, metric.GAUGE))
103115

104116
CPUAvailable := TotalMHz - float64(host.Summary.QuickStats.OverallCpuUsage)
@@ -109,8 +121,10 @@ func createHostSamples(config *load.Config) {
109121
if host.Config != nil {
110122
if host.Config.FileSystemVolume != nil {
111123
for _, mount := range host.Config.FileSystemVolume.MountInfo {
112-
capacity := mount.Volume.GetHostFileSystemVolume().Capacity
113-
diskTotalMiB += capacity / (1 << 20)
124+
hostFileSystemVolume := mount.Volume.GetHostFileSystemVolume()
125+
if hostFileSystemVolume != nil {
126+
diskTotalMiB += hostFileSystemVolume.Capacity / (1 << 20)
127+
}
114128
}
115129
}
116130
}

internal/process/process.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ const (
2121

2222
// Run process samples
2323
func Run(config *load.Config) {
24-
2524
// create samples async
2625
var wg sync.WaitGroup
2726
wg.Add(6)
@@ -111,14 +110,15 @@ func sanitizeEntityName(config *load.Config, entityName string, datacenterName s
111110
return entityName
112111
}
113112

114-
func createNewEntityWithMetricSet(config *load.Config, typeEntity string, entityName string, uniqueIdentifier string) *metric.Set {
113+
func createNewEntityWithMetricSet(config *load.Config, typeEntity string, entityName string, uniqueIdentifier string) (*metric.Set, error) {
115114
workingEntity, err := config.Integration.Entity(uniqueIdentifier, "vsphere-"+strings.ToLower(typeEntity))
116115
if err != nil {
117116
config.Logrus.WithError(err).Error("failed to create entity")
117+
return nil, err
118118
}
119119

120120
// entity displayName
121-
workingEntity.SetInventoryItem("vsphere"+typeEntity, "name", entityName)
121+
checkError(config, workingEntity.SetInventoryItem("vsphere"+typeEntity, "name", entityName))
122122
ms := workingEntity.NewMetricSet("VSphere" + typeEntity + "Sample")
123-
return ms
123+
return ms, nil
124124
}

internal/process/resourcepool.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ func createResourcePoolSamples(config *load.Config) {
2929

3030
entityName = sanitizeEntityName(config, entityName, datacenterName)
3131

32-
ms := createNewEntityWithMetricSet(config, entityTypeResourcePool, entityName, entityName)
32+
ms, err := createNewEntityWithMetricSet(config, entityTypeResourcePool, entityName, entityName)
33+
if err != nil {
34+
config.Logrus.WithError(err).WithField("resourcePoolName", entityName).Error("failed to create metricSet")
35+
continue
36+
}
3337

3438
checkError(config, ms.SetMetric("resourcePoolName", resourcePoolName, metric.ATTRIBUTE))
3539
if config.Args.DatacenterLocation != "" {
@@ -42,7 +46,7 @@ func createResourcePoolSamples(config *load.Config) {
4246
}
4347
}
4448

45-
memTotal := (rp.Runtime.Memory.ReservationUsed + rp.Runtime.Memory.UnreservedForPool) / (1e6)
49+
memTotal := (rp.Runtime.Memory.ReservationUsed + rp.Runtime.Memory.UnreservedForPool) / (1 << 20)
4650
checkError(config, ms.SetMetric("mem.size", memTotal, metric.GAUGE))
4751

4852
summary := rp.Summary.GetResourcePoolSummary()

0 commit comments

Comments
 (0)