Skip to content

Commit 270f8d3

Browse files
Merge pull request #42 from alibaba/bugfix-metrics
bugfix: fix heart beat metric data
2 parents 810a725 + 21f0a15 commit 270f8d3

1 file changed

Lines changed: 31 additions & 18 deletions

File tree

internal/remoting/heartbeat.go

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@ import (
2121
"encoding/json"
2222
"errors"
2323
"fmt"
24-
"math"
2524
"os"
2625
"runtime"
26+
"strconv"
2727
"syscall"
2828
"time"
2929

3030
"github.com/asynkron/protoactor-go/actor"
31+
"github.com/shirou/gopsutil/v4/disk"
3132
"github.com/shirou/gopsutil/v4/load"
33+
"github.com/shirou/gopsutil/v4/mem"
3234
"google.golang.org/protobuf/proto"
3335

3436
"github.com/alibaba/schedulerx-worker-go/config"
@@ -118,32 +120,43 @@ func sendHeartbeat(ctx context.Context, req *schedulerx.WorkerHeartBeatRequest)
118120
return trans.WriteAkkaMsg(akkaMsg, conn)
119121
}
120122

121-
func getLoadAvg() ([]float64, error) {
122-
avg, err := load.Avg()
123+
func metricsJsonStr() string {
124+
loadAvg, err := load.Avg()
123125
if err != nil {
124-
return nil, err
126+
logger.Warnf("Failed to get system load average:" + err.Error())
127+
return "{}"
125128
}
126-
return []float64{avg.Load1, avg.Load5, avg.Load15}, nil
127-
}
128129

129-
func metricsJsonStr() string {
130-
loadAvg, err := getLoadAvg()
130+
cpus, _ := strconv.Atoi(os.Getenv("SIGMA_MAX_PROCESSORS_LIMIT"))
131+
if cpus <= 0 {
132+
cpus = runtime.NumCPU()
133+
}
134+
135+
ms, err := mem.VirtualMemory()
131136
if err != nil {
132-
logger.Warnf("Failed to get system load average:" + err.Error())
137+
logger.Warnf("Failed to get system mem info:" + err.Error())
133138
return "{}"
134139
}
135-
cpus := runtime.NumCPU()
136-
memstats := new(runtime.MemStats)
137-
runtime.ReadMemStats(memstats)
138140

139141
metricsJson := map[string]float64{
140-
"cpuLoad1": loadAvg[0],
141-
"cpuLoad5": loadAvg[1],
142+
"cpuLoad1": loadAvg.Load1,
143+
"cpuLoad5": loadAvg.Load5,
142144
"cpuProcessors": float64(cpus),
143-
"heap1Usage": float64(memstats.HeapInuse) / float64(memstats.HeapSys),
144-
"heap1Used": float64(memstats.HeapInuse) / 1024 / 1024,
145-
"heap5Usage": float64(memstats.HeapInuse) / math.Max(float64(memstats.HeapSys), 1),
146-
"heapMax": float64(memstats.HeapSys) / 1024 / 1024,
145+
"heap1Usage": ms.UsedPercent / 100,
146+
"heap1Used": float64(ms.Used) / 1024 / 1024,
147+
"heap5Usage": ms.UsedPercent / 100,
148+
"heapMax": float64(ms.Available+ms.Used) / 1024 / 1024,
149+
}
150+
diskStat, err := disk.Usage("/")
151+
if err != nil {
152+
fmt.Println("Failed to get system disk usage info:" + err.Error())
153+
} else {
154+
diskUsed := diskStat.Used / 1024 / 1024
155+
diskFree := diskStat.Free / 1024 / 1024
156+
diskMax := float64(diskUsed + diskFree)
157+
metricsJson["diskUsed"] = float64(diskUsed)
158+
metricsJson["diskMax"] = diskMax
159+
metricsJson["diskUsage"] = float64(diskUsed) / diskMax
147160
}
148161
ret, err := json.Marshal(metricsJson)
149162
if err != nil {

0 commit comments

Comments
 (0)