Skip to content

Commit 061acd6

Browse files
authored
feat(metrics): remove unused EMA sql track/query (#533)
Fault-prone... ``` unexpected fault address 0x6304e55848a6b fatal error: fault [signal SIGSEGV: segmentation violation code=0x1 addr=0x6304e55848a6b pc=0x4247c1] goroutine 307 gp=0xc0004fb6c0 m=42 mp=0xc001b1b108 [running]: runtime.throw({0x1b12994?, 0x7c47e5b223c8?}) /opt/hostedtoolcache/go/1.23.6/x64/src/runtime/panic.go:1067 +0x48 fp=0xc001540e30 sp=0xc001540e00 pc=0x48bac8 runtime.sigpanic() /opt/hostedtoolcache/go/1.23.6/x64/src/runtime/signal_unix.go:931 +0x26c fp=0xc001540e90 sp=0xc001540e30 pc=0x48dd6c runtime.buildTypeAssertCache.func1(...) /opt/hostedtoolcache/go/1.23.6/x64/src/runtime/iface.go:532 runtime.buildTypeAssertCache(0x18d96c0?, 0x18d9940, 0x7c479d9bae28) /opt/hostedtoolcache/go/1.23.6/x64/src/runtime/iface.go:544 +0x141 fp=0xc001540ed8 sp=0xc001540e90 pc=0x4247c1 runtime.typeAssert(0x2ab06a0, 0x18d9940?) /opt/hostedtoolcache/go/1.23.6/x64/src/runtime/iface.go:497 +0xfd fp=0xc001540f18 sp=0xc001540ed8 pc=0x4245bd database/sql.convertAssignRows({0x18d9940, 0xc002aba6e0}, {0x180ae80, 0xc002aba710}, 0xc002c6d720) /opt/hostedtoolcache/go/1.23.6/x64/src/database/sql/convert.go:395 +0x1f0e fp=0xc0015411d0 sp=0xc001540f18 pc=0xacebae database/sql.(*Rows).Scan(0xc002c6d720, {0xc001541478, 0x1, 0xc0015412a8?}) /opt/hostedtoolcache/go/1.23.6/x64/src/database/sql/sql.go:3392 +0x39a fp=0xc0015412b0 sp=0xc0015411d0 pc=0xadf27a database/sql.(*Row).Scan(0xc001541410, {0xc001541478?, 0xc00038bc70?, 0x1}) /opt/hostedtoolcache/go/1.23.6/x64/src/database/sql/sql.go:3509 +0x132 fp=0xc001541320 sp=0xc0015412b0 pc=0xadf952 github.com/leptonai/gpud/pkg/gpud-metrics/state.EMASince({0x1e187f0, 0xc00038bc70}, 0xc00042cdd0, {0x1b3366e, 0x12}, {0x1b7db54, 0x32}, {0xc000922510, 0x28}, 0x45d964b800, ...) /home/runner/work/gpud/gpud/pkg/gpud-metrics/state/state.go:335 +0x579 fp=0xc0015414b0 sp=0xc001541320 pc=0xbfe279 github.com/leptonai/gpud/pkg/gpud-metrics.(*continuousAverager).EMA(0xc001d2a000, {0x1e187f0, 0xc00038bc70}, {0xc001d67bf0, 0x2, 0x2}) ``` Signed-off-by: Gyuho Lee <[email protected]>
1 parent f6c8442 commit 061acd6

File tree

12 files changed

+8
-623
lines changed

12 files changed

+8
-623
lines changed

components/cpu/metrics/metrics.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414

1515
const SubSystem = "cpu"
1616

17-
// Used for tracking the past x-minute averages + EMAs.
17+
// Used for tracking the past x-minute averages.
1818
var defaultPeriods = []time.Duration{5 * time.Minute}
1919

2020
var (

components/disk/metrics/metrics.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414

1515
const SubSystem = "disk"
1616

17-
// Used for tracking the past x-minute averages + EMAs.
17+
// Used for tracking the past x-minute averages.
1818
var defaultPeriods = []time.Duration{5 * time.Minute}
1919

2020
var (

components/memory/metrics/metrics.go

+1-40
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414

1515
const SubSystem = "memory"
1616

17-
// Used for tracking the past x-minute averages + EMAs.
17+
// Used for tracking the past x-minute averages.
1818
var defaultPeriods = []time.Duration{5 * time.Minute}
1919

2020
var (
@@ -64,15 +64,6 @@ var (
6464
},
6565
[]string{"last_period"},
6666
)
67-
usedBytesEMA = prometheus.NewGaugeVec(
68-
prometheus.GaugeOpts{
69-
Namespace: "",
70-
Subsystem: SubSystem,
71-
Name: "used_bytes_ema",
72-
Help: "tracks the used memory in bytes with exponential moving average",
73-
},
74-
[]string{"ema_period"},
75-
)
7667

7768
usedPercent = prometheus.NewGauge(
7869
prometheus.GaugeOpts{
@@ -92,15 +83,6 @@ var (
9283
},
9384
[]string{"last_period"},
9485
)
95-
usedPercentEMA = prometheus.NewGaugeVec(
96-
prometheus.GaugeOpts{
97-
Namespace: "",
98-
Subsystem: SubSystem,
99-
Name: "used_percent_ema",
100-
Help: "tracks the percentage of memory used with exponential moving average",
101-
},
102-
[]string{"ema_period"},
103-
)
10486

10587
freeBytes = prometheus.NewGauge(
10688
prometheus.GaugeOpts{
@@ -172,15 +154,6 @@ func SetUsedBytes(ctx context.Context, bytes float64, currentTime time.Time) err
172154
return err
173155
}
174156
usedBytesAverage.WithLabelValues(duration.String()).Set(avg)
175-
176-
ema, err := usedBytesAverager.EMA(
177-
ctx,
178-
components_metrics.WithEMAPeriod(duration),
179-
)
180-
if err != nil {
181-
return err
182-
}
183-
usedBytesEMA.WithLabelValues(duration.String()).Set(ema)
184157
}
185158

186159
return nil
@@ -199,12 +172,6 @@ func SetUsedPercent(ctx context.Context, pct float64, currentTime time.Time) err
199172
return err
200173
}
201174
usedPercentAverage.WithLabelValues(duration.String()).Set(avg)
202-
203-
ema, err := usedPercentAverager.EMA(ctx, components_metrics.WithEMAPeriod(duration))
204-
if err != nil {
205-
return err
206-
}
207-
usedPercentEMA.WithLabelValues(duration.String()).Set(ema)
208175
}
209176

210177
return nil
@@ -229,15 +196,9 @@ func Register(reg *prometheus.Registry, dbRW *sql.DB, dbRO *sql.DB, tableName st
229196
if err := reg.Register(usedBytes); err != nil {
230197
return err
231198
}
232-
if err := reg.Register(usedBytesEMA); err != nil {
233-
return err
234-
}
235199
if err := reg.Register(usedPercent); err != nil {
236200
return err
237201
}
238-
if err := reg.Register(usedPercentEMA); err != nil {
239-
return err
240-
}
241202
if err := reg.Register(freeBytes); err != nil {
242203
return err
243204
}

pkg/gpud-metrics/averager.go

-29
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ type Averager interface {
2727
// If since is zero, returns the average value for all data points.
2828
Avg(ctx context.Context, opts ...OpOption) (float64, error)
2929

30-
// EMA returns the EMA value from the "since" time.
31-
// If since is zero, returns the EMA value for all data points.
32-
// If the ema period is zero, returns the 1-minute EMA value.
33-
EMA(ctx context.Context, opts ...OpOption) (float64, error)
34-
3530
// Returns all the data points since the given time.
3631
// If since is zero, returns all metrics.
3732
Read(ctx context.Context, opts ...OpOption) (state.Metrics, error)
@@ -61,10 +56,6 @@ func (n *noOpAverager) Avg(ctx context.Context, opts ...OpOption) (float64, erro
6156
return 0, nil
6257
}
6358

64-
func (n *noOpAverager) EMA(ctx context.Context, opts ...OpOption) (float64, error) {
65-
return 0, nil
66-
}
67-
6859
func (n *noOpAverager) Read(ctx context.Context, opts ...OpOption) (state.Metrics, error) {
6960
return state.Metrics{}, nil
7061
}
@@ -153,16 +144,6 @@ func (c *continuousAverager) Avg(ctx context.Context, opts ...OpOption) (float64
153144
return state.AvgSince(ctx, c.dbRO, c.tableName, c.metricName, op.metricSecondaryName, op.since)
154145
}
155146

156-
// EMA returns the EMA value from the "since" time.
157-
// If since is zero, returns the EMA value for all data points.
158-
func (c *continuousAverager) EMA(ctx context.Context, opts ...OpOption) (float64, error) {
159-
op := &Op{}
160-
if err := op.applyOpts(opts); err != nil {
161-
return 0.0, err
162-
}
163-
return state.EMASince(ctx, c.dbRO, c.tableName, c.metricName, op.metricSecondaryName, op.emaPeriod, op.since)
164-
}
165-
166147
func (c *continuousAverager) Read(ctx context.Context, opts ...OpOption) (state.Metrics, error) {
167148
op := &Op{}
168149
if err := op.applyOpts(opts); err != nil {
@@ -174,7 +155,6 @@ func (c *continuousAverager) Read(ctx context.Context, opts ...OpOption) (state.
174155
type Op struct {
175156
currentTime time.Time
176157
since time.Time
177-
emaPeriod time.Duration
178158
metricSecondaryName string
179159
}
180160

@@ -188,9 +168,6 @@ func (op *Op) applyOpts(opts []OpOption) error {
188168
if op.currentTime.IsZero() {
189169
op.currentTime = time.Now().UTC()
190170
}
191-
if op.emaPeriod == 0 {
192-
op.emaPeriod = time.Minute
193-
}
194171

195172
return nil
196173
}
@@ -207,12 +184,6 @@ func WithSince(t time.Time) OpOption {
207184
}
208185
}
209186

210-
func WithEMAPeriod(period time.Duration) OpOption {
211-
return func(op *Op) {
212-
op.emaPeriod = period
213-
}
214-
}
215-
216187
func WithMetricSecondaryName(name string) OpOption {
217188
return func(op *Op) {
218189
op.metricSecondaryName = name

pkg/gpud-metrics/averager_test.go

-191
Original file line numberDiff line numberDiff line change
@@ -74,51 +74,13 @@ func TestAveragerObserve(t *testing.T) {
7474
t.Errorf("AvgSince() returned error: %v", err)
7575
}
7676

77-
// test EMA
78-
start = time.Now()
79-
emaResult, err := a.EMA(ctx, WithEMAPeriod(time.Minute))
80-
emaLatency := time.Since(start)
81-
t.Logf("EMA latency: %v", emaLatency)
82-
if err != nil {
83-
t.Errorf("EMA() returned error: %v", err)
84-
}
85-
86-
// EMA should be closer to recent values, so it should be higher than the average
87-
if emaResult <= avgResult {
88-
t.Errorf("EMA() = %f; expected to be greater than AvgSince() = %f", emaResult, avgResult)
89-
}
90-
9177
t.Logf("AvgSince result: %f", avgResult)
92-
t.Logf("EMA result: %f", emaResult)
9378

9479
expectedAvg := 250.5 // (1 + 500) / 2
9580
if avgResult != expectedAvg {
9681
t.Errorf("AvgSince() = %f; want %f", avgResult, expectedAvg)
9782
}
9883

99-
// test EMA with different time ranges
100-
testRanges := []struct {
101-
name string
102-
duration time.Duration
103-
}{
104-
{"Last 1 minute", time.Minute},
105-
{"Last 5 minutes", 5 * time.Minute},
106-
{"Last 10 minutes", 10 * time.Minute},
107-
}
108-
109-
for _, tr := range testRanges {
110-
since := now.Add(-tr.duration)
111-
emaResult, err := a.EMA(ctx, WithEMAPeriod(time.Minute), WithSince(since))
112-
if err != nil {
113-
t.Errorf("EMA() for %s returned error: %v", tr.name, err)
114-
}
115-
avgResult, err := a.Avg(ctx, WithSince(since))
116-
if err != nil {
117-
t.Errorf("AvgSince() for %s returned error: %v", tr.name, err)
118-
}
119-
t.Logf("%s - AvgSince: %f, EMA: %f", tr.name, avgResult, emaResult)
120-
}
121-
12284
allMetrics, err := a.Read(ctx)
12385
if err != nil {
12486
t.Errorf("All() returned error: %v", err)
@@ -234,141 +196,6 @@ func TestEmptyAverager(t *testing.T) {
234196
}
235197
}
236198

237-
func TestContinuousAveragerRead(t *testing.T) {
238-
dbRW, dbRO, cleanup := sqlite.OpenTestDB(t)
239-
defer cleanup()
240-
241-
ctx, cancel := context.WithCancel(context.Background())
242-
defer cancel()
243-
244-
if err := metrics_state.CreateTableMetrics(ctx, dbRW, "test_table"); err != nil {
245-
t.Fatalf("failed to create table: %v", err)
246-
}
247-
248-
createTime := func(minutes int) time.Time {
249-
return time.Date(2025, 1, 1, 0, minutes, 0, 0, time.UTC)
250-
}
251-
252-
tests := []struct {
253-
name string
254-
setup func() *continuousAverager
255-
since time.Time
256-
expected float64
257-
}{
258-
{
259-
name: "empty averager",
260-
setup: func() *continuousAverager {
261-
return NewAverager(dbRW, dbRO, "test_table", "empty averager").(*continuousAverager)
262-
},
263-
since: time.Time{},
264-
expected: 0.0,
265-
},
266-
{
267-
name: "all values",
268-
setup: func() *continuousAverager {
269-
a := NewAverager(dbRW, dbRO, "test_table", "all values").(*continuousAverager)
270-
if err := a.Observe(ctx, 1.0, WithCurrentTime(createTime(1))); err != nil {
271-
t.Fatalf("Observe(1.0) returned error: %v", err)
272-
}
273-
if err := a.Observe(ctx, 2.0, WithCurrentTime(createTime(2))); err != nil {
274-
t.Fatalf("Observe(2.0) returned error: %v", err)
275-
}
276-
if err := a.Observe(ctx, 3.0, WithCurrentTime(createTime(3))); err != nil {
277-
t.Fatalf("Observe(3.0) returned error: %v", err)
278-
}
279-
return a
280-
},
281-
since: time.Time{},
282-
expected: 2.0,
283-
},
284-
{
285-
name: "since middle",
286-
setup: func() *continuousAverager {
287-
a := NewAverager(dbRW, dbRO, "test_table", "since middle").(*continuousAverager)
288-
if err := a.Observe(ctx, 1.0, WithCurrentTime(createTime(1))); err != nil {
289-
t.Fatalf("Observe(1.0) returned error: %v", err)
290-
}
291-
if err := a.Observe(ctx, 2.0, WithCurrentTime(createTime(2))); err != nil {
292-
t.Fatalf("Observe(2.0) returned error: %v", err)
293-
}
294-
if err := a.Observe(ctx, 3.0, WithCurrentTime(createTime(3))); err != nil {
295-
t.Fatalf("Observe(3.0) returned error: %v", err)
296-
}
297-
if err := a.Observe(ctx, 4.0, WithCurrentTime(createTime(4))); err != nil {
298-
t.Fatalf("Observe(4.0) returned error: %v", err)
299-
}
300-
return a
301-
},
302-
since: createTime(2),
303-
expected: 3.0,
304-
},
305-
{
306-
name: "since before all values",
307-
setup: func() *continuousAverager {
308-
a := NewAverager(dbRW, dbRO, "test_table", "since before all values").(*continuousAverager)
309-
if err := a.Observe(ctx, 1.0, WithCurrentTime(createTime(2))); err != nil {
310-
t.Fatalf("Observe(1.0) returned error: %v", err)
311-
}
312-
if err := a.Observe(ctx, 2.0, WithCurrentTime(createTime(3))); err != nil {
313-
t.Fatalf("Observe(2.0) returned error: %v", err)
314-
}
315-
return a
316-
},
317-
since: createTime(1),
318-
expected: 1.5,
319-
},
320-
{
321-
name: "since after all values",
322-
setup: func() *continuousAverager {
323-
a := NewAverager(dbRW, dbRO, "test_table", "since after all values").(*continuousAverager)
324-
if err := a.Observe(ctx, 1.0, WithCurrentTime(createTime(1))); err != nil {
325-
t.Fatalf("Observe(1.0) returned error: %v", err)
326-
}
327-
if err := a.Observe(ctx, 2.0, WithCurrentTime(createTime(2))); err != nil {
328-
t.Fatalf("Observe(2.0) returned error: %v", err)
329-
}
330-
return a
331-
},
332-
since: createTime(3),
333-
expected: 0.0,
334-
},
335-
{
336-
name: "wrapped buffer",
337-
setup: func() *continuousAverager {
338-
a := NewAverager(dbRW, dbRO, "test_table", "wrapped buffer").(*continuousAverager)
339-
if err := a.Observe(ctx, 1.0, WithCurrentTime(createTime(1))); err != nil {
340-
t.Fatalf("Observe(1.0) returned error: %v", err)
341-
}
342-
if err := a.Observe(ctx, 2.0, WithCurrentTime(createTime(2))); err != nil {
343-
t.Fatalf("Observe(2.0) returned error: %v", err)
344-
}
345-
if err := a.Observe(ctx, 3.0, WithCurrentTime(createTime(3))); err != nil {
346-
t.Fatalf("Observe(3.0) returned error: %v", err)
347-
}
348-
if err := a.Observe(ctx, 4.0, WithCurrentTime(createTime(4))); err != nil {
349-
t.Fatalf("Observe(4.0) returned error: %v", err)
350-
}
351-
return a
352-
},
353-
since: createTime(2),
354-
expected: 3.0,
355-
},
356-
}
357-
358-
for _, tt := range tests {
359-
t.Run(tt.name, func(t *testing.T) {
360-
a := tt.setup()
361-
result, err := a.Avg(ctx, WithSince(tt.since))
362-
if err != nil {
363-
t.Errorf("Read() returned error: %v", err)
364-
}
365-
if result != tt.expected {
366-
t.Errorf("Read() = %v, want %v", result, tt.expected)
367-
}
368-
})
369-
}
370-
}
371-
372199
func TestNoOpAverager(t *testing.T) {
373200
t.Parallel()
374201

@@ -432,24 +259,6 @@ func TestNoOpAverager(t *testing.T) {
432259
t.Errorf("Avg() with options returned %f, want 0", avg)
433260
}
434261

435-
// Test EMA - should return zero and nil error
436-
ema, err := a.EMA(ctx)
437-
if err != nil {
438-
t.Errorf("EMA() returned error: %v", err)
439-
}
440-
if ema != 0 {
441-
t.Errorf("EMA() returned %f, want 0", ema)
442-
}
443-
444-
// Test EMA with options - should still return zero and nil error
445-
ema, err = a.EMA(ctx, WithSince(time.Now()), WithEMAPeriod(time.Minute))
446-
if err != nil {
447-
t.Errorf("EMA() with options returned error: %v", err)
448-
}
449-
if ema != 0 {
450-
t.Errorf("EMA() with options returned %f, want 0", ema)
451-
}
452-
453262
// Test Read - should return empty metrics and nil error
454263
metrics, err := a.Read(ctx)
455264
if err != nil {

0 commit comments

Comments
 (0)