Skip to content

Commit 4817abc

Browse files
metricbeat: ensure event.duration field is not discarded on Windows (#44442) (#44454)
* metricbeat: ensure event.duration field is not discarded on Windows Windows, starting with Go 1.23+, provides high-resolution timers, but the effective clock resolution remains around 0.5 ms. When event processing is very fast, the elapsed time between start and end may be below this resolution, resulting in a zero duration. A zero event duration is interpreted as absence of value and discarded when setting event fields. This commit ensures a minimum duration is always set. * linter fixes * use rand.N * add changelog (cherry picked from commit 7839c6b) Co-authored-by: Mauri de Souza Meneguzzo <[email protected]>
1 parent 43077a0 commit 4817abc

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

CHANGELOG.next.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ otherwise no tag is added. {issue}42208[42208] {pull}42403[42403]
6868
- Add GCP organization and project details to ECS cloud fields. {pull}40461[40461]
6969
- Fix the function to determine CPU cores on windows {issue}42593[42593] {pull}43409[43409]
7070
- Handle permission errors while collecting data from Windows services and don't interrupt the overall collection by skipping affected services {issue}40765[40765] {pull}43665[43665]
71+
- Fixed a bug where `event.duration` could be missing from an event on Windows systems due to low-resolution clock. {pull}44440[44440]
7172

7273
*Osquerybeat*
7374

metricbeat/mb/module/wrapper.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"context"
2222
"errors"
2323
"fmt"
24-
"math/rand"
24+
"math/rand/v2"
2525
"sync"
2626
"time"
2727

@@ -205,7 +205,7 @@ func (msw *metricSetWrapper) run(done <-chan struct{}, out chan<- beat.Event) {
205205

206206
// Start each metricset randomly over a period of MaxDelayPeriod.
207207
if msw.module.maxStartDelay > 0 {
208-
delay := time.Duration(rand.Int63n(int64(msw.module.maxStartDelay)))
208+
delay := rand.N(msw.module.maxStartDelay)
209209
debugf("%v/%v will start after %v", msw.module.Name(), msw.Name(), delay)
210210
select {
211211
case <-done:
@@ -320,15 +320,15 @@ func (msw *metricSetWrapper) handleFetchError(err error, reporter mb.PushReporte
320320
reporter.Error(err)
321321
msw.stats.consecutiveFailures.Set(0)
322322
// mark module as running if metrics are partially available and display the error message
323-
msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
323+
msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.Name(), err))
324324
logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
325325

326326
default:
327327
reporter.Error(err)
328328
msw.stats.consecutiveFailures.Inc()
329329
if msw.failureThreshold > 0 && msw.stats.consecutiveFailures != nil && uint(msw.stats.consecutiveFailures.Get()) >= msw.failureThreshold {
330330
// mark it as degraded for any other issue encountered
331-
msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
331+
msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.Name(), err))
332332
}
333333
logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
334334

@@ -401,7 +401,8 @@ func (r reporterV2) Done() <-chan struct{} { return r.done }
401401
func (r reporterV2) Error(err error) bool { return r.Event(mb.Event{Error: err}) }
402402
func (r reporterV2) Event(event mb.Event) bool {
403403
if event.Took == 0 && !r.start.IsZero() {
404-
event.Took = time.Since(r.start)
404+
// ensure elapsed time is always > 0
405+
event.Took = max(time.Since(r.start), time.Microsecond)
405406
}
406407
if r.msw.periodic {
407408
event.Period = r.msw.Module().Config().Period
@@ -428,7 +429,7 @@ func (r reporterV2) Event(event mb.Event) bool {
428429
if event.Namespace == "" {
429430
event.Namespace = r.msw.Registration().Namespace
430431
}
431-
beatEvent := event.BeatEvent(r.msw.module.Name(), r.msw.MetricSet.Name(), r.msw.module.eventModifiers...)
432+
beatEvent := event.BeatEvent(r.msw.module.Name(), r.msw.Name(), r.msw.module.eventModifiers...)
432433
if !writeEvent(r.done, r.out, beatEvent) {
433434
return false
434435
}

metricbeat/mb/module/wrapper_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,30 @@ func TestPeriodIsAddedToEvent(t *testing.T) {
202202
}
203203
}
204204

205+
func TestDurationIsAddedToEvent(t *testing.T) {
206+
hosts := []string{"alpha"}
207+
config := newConfig(t, map[string]interface{}{
208+
"module": moduleName,
209+
"metricsets": []string{reportingFetcherName},
210+
"hosts": hosts,
211+
})
212+
213+
registry := newTestRegistry(t)
214+
m, err := module.NewWrapper(config, registry, module.WithMetricSetInfo())
215+
require.NoError(t, err)
216+
217+
done := make(chan struct{})
218+
defer close(done)
219+
220+
output := m.Start(done)
221+
222+
event := <-output
223+
224+
fields := event.Fields.Flatten()
225+
assert.Contains(t, fields, "event.duration", "event.duration should be present in event")
226+
assert.Greater(t, fields["event.duration"], time.Duration(0), "event.duration should be greater than 0")
227+
}
228+
205229
func TestNewWrapperForMetricSet(t *testing.T) {
206230
hosts := []string{"alpha"}
207231
c := newConfig(t, map[string]interface{}{

0 commit comments

Comments
 (0)