Skip to content

Commit c26efc5

Browse files
authored
feat: WAF Run Scope support for RASP Metrics (#109)
- [x] Add the `Scope` system to classfiy Waf Runs - [x] Refactor Stats and Metrics without breaking the current API - [x] Remove the `_dd.appsec` to all metrics for the futur when we support telemetry metrics --------- Signed-off-by: Eliott Bouhana <[email protected]>
1 parent 8cab7e7 commit c26efc5

File tree

4 files changed

+179
-71
lines changed

4 files changed

+179
-71
lines changed

context.go

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,13 @@ package waf
77

88
import (
99
"sync"
10+
"sync/atomic"
1011
"time"
1112

1213
"github.com/DataDog/go-libddwaf/v3/errors"
1314
"github.com/DataDog/go-libddwaf/v3/internal/bindings"
1415
"github.com/DataDog/go-libddwaf/v3/internal/unsafe"
1516
"github.com/DataDog/go-libddwaf/v3/timer"
16-
17-
"sync/atomic"
1817
)
1918

2019
// Context is a WAF execution context. It allows running the WAF incrementally
@@ -26,9 +25,10 @@ type Context struct {
2625
cgoRefs cgoRefPool // Used to retain go data referenced by WAF Objects the context holds
2726
cContext bindings.WafContext // The C ddwaf_context pointer
2827

29-
timeoutCount atomic.Uint64 // Cumulative timeout count for this context.
28+
// timeoutCount count all calls which have timeout'ed by scope. Keys are fixed at creation time.
29+
timeoutCount map[Scope]*atomic.Uint64
3030

31-
// Mutex protecting the use of cContext which is not thread-safe and cgoRefs.
31+
// mutex protecting the use of cContext which is not thread-safe and cgoRefs.
3232
mutex sync.Mutex
3333

3434
// timer registers the time spent in the WAF and go-libddwaf
@@ -39,7 +39,7 @@ type Context struct {
3939

4040
// truncations provides details about truncations that occurred while
4141
// encoding address data for WAF execution.
42-
truncations map[TruncationReason][]int
42+
truncations map[Scope]map[TruncationReason][]int
4343
}
4444

4545
// RunAddressData provides address data to the Context.Run method. If a given key is present in both
@@ -51,6 +51,8 @@ type RunAddressData struct {
5151
// Ephemeral address data is scoped to a given Context.Run call and is not persisted across calls. This is used for
5252
// protocols such as gRPC client/server streaming or GraphQL, where a single request can incur multiple subrequests.
5353
Ephemeral map[string]any
54+
// Scope is the way to classify the different runs in the same context in order to have different metrics
55+
Scope Scope
5456
}
5557

5658
func (d RunAddressData) isEmpty() bool {
@@ -70,9 +72,13 @@ func (context *Context) Run(addressData RunAddressData) (res Result, err error)
7072
return
7173
}
7274

75+
if addressData.Scope == "" {
76+
addressData.Scope = DefaultScope
77+
}
78+
7379
defer func() {
7480
if err == errors.ErrTimeout {
75-
context.timeoutCount.Add(1)
81+
context.timeoutCount[addressData.Scope].Add(1)
7682
}
7783
}()
7884

@@ -94,21 +100,21 @@ func (context *Context) Run(addressData RunAddressData) (res Result, err error)
94100

95101
runTimer.Start()
96102
defer func() {
97-
context.metrics.add(wafRunTag, runTimer.Stop())
98-
context.metrics.merge(runTimer.Stats())
103+
context.metrics.add(addressData.Scope, wafRunTag, runTimer.Stop())
104+
context.metrics.merge(addressData.Scope, runTimer.Stats())
99105
}()
100106

101107
wafEncodeTimer := runTimer.MustLeaf(wafEncodeTag)
102108
wafEncodeTimer.Start()
103-
persistentData, persistentEncoder, err := context.encodeOneAddressType(addressData.Persistent, wafEncodeTimer)
109+
persistentData, persistentEncoder, err := context.encodeOneAddressType(addressData.Scope, addressData.Persistent, wafEncodeTimer)
104110
if err != nil {
105111
wafEncodeTimer.Stop()
106112
return res, err
107113
}
108114

109115
// The WAF releases ephemeral address data at the max of each run call, so we need not keep the Go values live beyond
110116
// that in the same way we need for persistent data. We hence use a separate encoder.
111-
ephemeralData, ephemeralEncoder, err := context.encodeOneAddressType(addressData.Ephemeral, wafEncodeTimer)
117+
ephemeralData, ephemeralEncoder, err := context.encodeOneAddressType(addressData.Scope, addressData.Ephemeral, wafEncodeTimer)
112118
if err != nil {
113119
wafEncodeTimer.Stop()
114120
return res, err
@@ -180,7 +186,7 @@ func merge[K comparable, V any](a, b map[K][]V) (merged map[K][]V) {
180186
// is a nil map, but this behaviour is expected since either persistent or ephemeral addresses are allowed to be null
181187
// one at a time. In this case, Encode will return nil contrary to Encode which will return a nil wafObject,
182188
// which is what we need to send to ddwaf_run to signal that the address data is empty.
183-
func (context *Context) encodeOneAddressType(addressData map[string]any, timer timer.Timer) (*bindings.WafObject, encoder, error) {
189+
func (context *Context) encodeOneAddressType(scope Scope, addressData map[string]any, timer timer.Timer) (*bindings.WafObject, encoder, error) {
184190
encoder := newLimitedEncoder(timer)
185191
if addressData == nil {
186192
return nil, encoder, nil
@@ -191,7 +197,7 @@ func (context *Context) encodeOneAddressType(addressData map[string]any, timer t
191197
context.mutex.Lock()
192198
defer context.mutex.Unlock()
193199

194-
context.truncations = merge(context.truncations, encoder.truncations)
200+
context.truncations[scope] = merge(context.truncations[scope], encoder.truncations)
195201
}
196202

197203
if timer.Exhausted() {
@@ -269,14 +275,15 @@ func (context *Context) Close() {
269275

270276
// TotalRuntime returns the cumulated WAF runtime across various run calls within the same WAF context.
271277
// Returned time is in nanoseconds.
272-
// Deprecated: use Timings instead
278+
// Deprecated: use Stats instead
273279
func (context *Context) TotalRuntime() (uint64, uint64) {
274-
return uint64(context.metrics.get(wafRunTag)), uint64(context.metrics.get(wafDurationTag))
280+
return uint64(context.metrics.get(DefaultScope, wafRunTag)), uint64(context.metrics.get(DefaultScope, wafDurationTag))
275281
}
276282

277283
// TotalTimeouts returns the cumulated amount of WAF timeouts across various run calls within the same WAF context.
284+
// Deprecated: use Stats instead
278285
func (context *Context) TotalTimeouts() uint64 {
279-
return context.timeoutCount.Load()
286+
return context.timeoutCount[DefaultScope].Load()
280287
}
281288

282289
// Stats returns the cumulative time spent in various parts of the WAF, all in nanoseconds
@@ -285,15 +292,36 @@ func (context *Context) Stats() Stats {
285292
context.mutex.Lock()
286293
defer context.mutex.Unlock()
287294

288-
truncations := make(map[TruncationReason][]int, len(context.truncations))
289-
for reason, counts := range context.truncations {
295+
truncations := make(map[TruncationReason][]int, len(context.truncations[DefaultScope]))
296+
for reason, counts := range context.truncations[DefaultScope] {
290297
truncations[reason] = make([]int, len(counts))
291298
copy(truncations[reason], counts)
292299
}
293300

301+
raspTruncations := make(map[TruncationReason][]int, len(context.truncations[RASPScope]))
302+
for reason, counts := range context.truncations[RASPScope] {
303+
raspTruncations[reason] = make([]int, len(counts))
304+
copy(raspTruncations[reason], counts)
305+
}
306+
307+
var (
308+
timeoutDefault uint64
309+
timeoutRASP uint64
310+
)
311+
312+
if atomic, ok := context.timeoutCount[DefaultScope]; ok {
313+
timeoutDefault = atomic.Load()
314+
}
315+
316+
if atomic, ok := context.timeoutCount[RASPScope]; ok {
317+
timeoutRASP = atomic.Load()
318+
}
319+
294320
return Stats{
295-
Timers: context.metrics.copy(),
296-
TimeoutCount: context.timeoutCount.Load(),
297-
Truncations: truncations,
321+
Timers: context.metrics.timers(),
322+
TimeoutCount: timeoutDefault,
323+
TimeoutRASPCount: timeoutRASP,
324+
Truncations: truncations,
325+
TruncationsRASP: raspTruncations,
298326
}
299327
}

handle.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func NewHandle(rules any, keyObfuscatorRegex string, valueObfuscatorRegex string
6969
cHandle := wafLib.WafInit(obj, config, diagnosticsWafObj)
7070
// Upon failure, the WAF may have produced some diagnostics to help signal what went wrong...
7171
var (
72-
diags *Diagnostics
72+
diags = new(Diagnostics)
7373
diagsErr error
7474
)
7575
if !diagnosticsWafObj.IsInvalid() {
@@ -132,7 +132,17 @@ func (handle *Handle) NewContextWithBudget(budget time.Duration) (*Context, erro
132132
return nil, err
133133
}
134134

135-
return &Context{handle: handle, cContext: cContext, timer: timer, metrics: metricsStore{data: make(map[string]time.Duration, 5)}}, nil
135+
return &Context{
136+
handle: handle,
137+
cContext: cContext,
138+
timer: timer,
139+
metrics: metricsStore{data: make(map[metricKey]time.Duration, 5)},
140+
truncations: make(map[Scope]map[TruncationReason][]int, 2),
141+
timeoutCount: map[Scope]*atomic.Uint64{
142+
DefaultScope: new(atomic.Uint64),
143+
RASPScope: new(atomic.Uint64),
144+
},
145+
}, nil
136146
}
137147

138148
// Diagnostics returns the rules initialization metrics for the current WAF handle

metrics.go

Lines changed: 75 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,92 +6,131 @@
66
package waf
77

88
import (
9-
"fmt"
9+
"strings"
1010
"sync"
1111
"time"
1212
)
1313

1414
// Stats stores the metrics collected by the WAF.
15-
type Stats struct {
16-
// Timers returns a map of metrics and their durations.
17-
Timers map[string]time.Duration
15+
type (
16+
Stats struct {
17+
// Timers returns a map of metrics and their durations.
18+
Timers map[string]time.Duration
1819

19-
// Timeout
20-
TimeoutCount uint64
20+
// TimeoutCount for the Default Scope i.e. "waf"
21+
TimeoutCount uint64
2122

22-
// Truncations provides details about truncations that occurred while
23-
// encoding address data for WAF execution.
24-
Truncations map[TruncationReason][]int
25-
}
23+
// TimeoutRASPCount for the RASP Scope i.e. "rasp"
24+
TimeoutRASPCount uint64
25+
26+
// Truncations provides details about truncations that occurred while
27+
// encoding address data for WAF execution.
28+
Truncations map[TruncationReason][]int
29+
30+
// TruncationsRASP provides details about truncations that occurred while
31+
// encoding address data for RASP execution.
32+
TruncationsRASP map[TruncationReason][]int
33+
}
34+
35+
// Scope is the way to classify the different runs in the same context in order to have different metrics
36+
Scope string
37+
38+
metricKey struct {
39+
scope Scope
40+
component string
41+
}
42+
43+
metricsStore struct {
44+
data map[metricKey]time.Duration
45+
mutex sync.RWMutex
46+
}
47+
)
2648

2749
const (
28-
wafEncodeTag = "_dd.appsec.waf.encode"
29-
wafRunTag = "_dd.appsec.waf.duration_ext"
30-
wafDurationTag = "_dd.appsec.waf.duration"
31-
wafDecodeTag = "_dd.appsec.waf.decode"
32-
wafTimeoutTag = "_dd.appsec.waf.timeouts"
33-
wafTruncationTag = "_dd.appsec.waf.truncations"
50+
DefaultScope Scope = "waf"
51+
RASPScope Scope = "rasp"
3452
)
3553

36-
// Metrics transform the stats returned by the WAF into a map of key value metrics for datadog backend
54+
const (
55+
wafEncodeTag = "encode"
56+
wafRunTag = "duration_ext"
57+
wafDurationTag = "duration"
58+
wafDecodeTag = "decode"
59+
wafTimeoutTag = "timeouts"
60+
wafTruncationTag = "truncations"
61+
)
62+
63+
func dot(parts ...string) string {
64+
return strings.Join(parts, ".")
65+
}
66+
67+
// Metrics transform the stats returned by the WAF into a map of key value metrics with values in microseconds.
68+
// ex. {"waf.encode": 100, "waf.duration_ext": 300, "waf.duration": 200, "rasp.encode": 100, "rasp.duration_ext": 300, "rasp.duration": 200}
3769
func (stats Stats) Metrics() map[string]any {
3870
tags := make(map[string]any, len(stats.Timers)+len(stats.Truncations)+1)
3971
for k, v := range stats.Timers {
4072
tags[k] = float64(v.Nanoseconds()) / float64(time.Microsecond) // The metrics should be in microseconds
4173
}
4274

43-
tags[wafTimeoutTag] = stats.TimeoutCount
75+
if stats.TimeoutCount > 0 {
76+
tags[dot(string(DefaultScope), wafTimeoutTag)] = stats.TimeoutCount
77+
}
78+
79+
if stats.TimeoutRASPCount > 0 {
80+
tags[dot(string(RASPScope), wafTimeoutTag)] = stats.TimeoutRASPCount
81+
}
82+
4483
for reason, list := range stats.Truncations {
45-
tags[fmt.Sprintf("%s.%s", wafTruncationTag, reason.String())] = list
84+
tags[dot(string(DefaultScope), wafTruncationTag, reason.String())] = list
4685
}
4786

48-
return tags
49-
}
87+
for reason, list := range stats.TruncationsRASP {
88+
tags[dot(string(RASPScope), wafTruncationTag, reason.String())] = list
89+
}
5090

51-
type metricsStore struct {
52-
data map[string]time.Duration
53-
mutex sync.RWMutex
91+
return tags
5492
}
5593

56-
func (metrics *metricsStore) add(key string, duration time.Duration) {
94+
func (metrics *metricsStore) add(scope Scope, component string, duration time.Duration) {
5795
metrics.mutex.Lock()
5896
defer metrics.mutex.Unlock()
5997
if metrics.data == nil {
60-
metrics.data = make(map[string]time.Duration, 5)
98+
metrics.data = make(map[metricKey]time.Duration, 5)
6199
}
62100

63-
metrics.data[key] += duration
101+
metrics.data[metricKey{scope, component}] += duration
64102
}
65103

66-
func (metrics *metricsStore) get(key string) time.Duration {
104+
func (metrics *metricsStore) get(scope Scope, component string) time.Duration {
67105
metrics.mutex.RLock()
68106
defer metrics.mutex.RUnlock()
69-
return metrics.data[key]
107+
return metrics.data[metricKey{scope, component}]
70108
}
71109

72-
func (metrics *metricsStore) copy() map[string]time.Duration {
110+
func (metrics *metricsStore) timers() map[string]time.Duration {
73111
metrics.mutex.Lock()
74112
defer metrics.mutex.Unlock()
75113
if metrics.data == nil {
76114
return nil
77115
}
78116

79-
copy := make(map[string]time.Duration, len(metrics.data))
117+
timers := make(map[string]time.Duration, len(metrics.data))
80118
for k, v := range metrics.data {
81-
copy[k] = v
119+
timers[dot(string(k.scope), k.component)] = v
82120
}
83-
return copy
121+
return timers
84122
}
85123

86124
// merge merges the current metrics with new ones
87-
func (metrics *metricsStore) merge(other map[string]time.Duration) {
125+
func (metrics *metricsStore) merge(scope Scope, other map[string]time.Duration) {
88126
metrics.mutex.Lock()
89127
defer metrics.mutex.Unlock()
90128
if metrics.data == nil {
91-
metrics.data = make(map[string]time.Duration, 5)
129+
metrics.data = make(map[metricKey]time.Duration, 5)
92130
}
93131

94-
for key, val := range other {
132+
for component, val := range other {
133+
key := metricKey{scope, component}
95134
prev, ok := metrics.data[key]
96135
if !ok {
97136
prev = 0

0 commit comments

Comments
 (0)