Skip to content

Commit 412ddc3

Browse files
authored
Merge pull request #16 from DataDog/purego
Purego WAF Bindings
2 parents 420a592 + af89580 commit 412ddc3

39 files changed

+1841
-1366
lines changed

.gitattributes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.dylib -diff
2+
*.so -diff
3+
*.a -diff

.github/workflows/test.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
# Install gotestsum
4242
env GOBIN=$PWD go install gotest.tools/gotestsum@latest
4343
# Run the tests with gotestsum
44-
env CGO_ENABLED=${{ matrix.cgo_enabled }} ./gotestsum -- -v ./... || true
44+
env CGO_ENABLED=${{ matrix.cgo_enabled }} ./gotestsum -- -v -count=10 -shuffle=on ./...
4545
4646
# Same tests but on the official golang container for linux
4747
golang-linux-container:
@@ -74,7 +74,7 @@ jobs:
7474
# Install gotestsum
7575
env GOBIN=$PWD go install gotest.tools/gotestsum@latest
7676
# Run the tests with gotestsum
77-
env CGO_ENABLED=${{ matrix.cgo_enabled }} ./gotestsum -- -v ./... || true
77+
env CGO_ENABLED=${{ matrix.cgo_enabled }} ./gotestsum -- -v -count=10 -shuffle=on ./...
7878
7979
linux-arm64:
8080
runs-on: ubuntu-latest
@@ -96,5 +96,4 @@ jobs:
9696
uses: docker/setup-qemu-action@v2
9797
with:
9898
platforms: arm64
99-
- run: docker run --platform=linux/arm64 -v $PWD:$PWD -w $PWD -eCGO_ENABLED=${{ matrix.cgo_enabled }} -eDD_APPSEC_WAF_TIMEOUT=$DD_APPSEC_WAF_TIMEOUT golang go test -v ./...
100-
99+
- run: docker run --platform=linux/arm64 -v $PWD:$PWD -w $PWD -eCGO_ENABLED=${{ matrix.cgo_enabled }} -eDD_APPSEC_WAF_TIMEOUT=$DD_APPSEC_WAF_TIMEOUT golang go test -v -count=10 -shuffle=on ./...

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
# Binaries for programs and plugins
22
*.exe
33
*.exe~
4-
*.dll
5-
*.so
6-
*.dylib
74

85
# Test binary, built with `go test -c`
96
*.test
@@ -13,3 +10,6 @@
1310

1411
# Dependency directories (remove the comment below to include it)
1512
# vendor/
13+
14+
.vscode/
15+
.idea/

README.md

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# go-libddwaf
2+
3+
This project's goal is to produce a higher level API for the go bindings to [libddwaf](https://github.com/DataDog/libddwaf): DataDog in-app WAF.
4+
It consists of 2 separate entities: the bindings for the calls to libddwaf, and the encoder whose job is to convert _any_ go value to its libddwaf object representation.
5+
6+
An example usage would be:
7+
8+
```go
9+
import waf "github.com/DataDog/go-libddwaf"
10+
11+
//go:embed
12+
var ruleset []byte
13+
14+
func main() {
15+
var parsedRuleset any
16+
17+
if err := json.Unmarshal(ruleset, &parsedRuleset); err != nil {
18+
return 1
19+
}
20+
21+
wafHandle, err := waf.NewHandle(parsedRuleset, "", "")
22+
if err != nil {
23+
return 1
24+
}
25+
26+
defer wafHandle.Close()
27+
28+
wafCtx := wafHandle.NewContext()
29+
defer wafCtx.Close()
30+
31+
matches, actions := wafCtx.Run(map[string]any{
32+
"server.request.path_params": "/rfiinc.txt",
33+
}, time.Minute)
34+
}
35+
```
36+
37+
The API documentation details can be found on [pkg.go.dev](https://pkg.go.dev/github.com/DataDog/go-libddwaf).
38+
39+
Originally this project was only here to provide CGO Wrappers to the calls to libddwaf.
40+
But with the appearance of `ddwaf_object` tree like structure,
41+
but also with the intention to build CGO-less bindings, this project size has grown to be a fully integrated brick in the DataDog tracer structure.
42+
Which in turn made it necessary to document the project, to maintain it in an orderly fashion.
43+
44+
## Design
45+
46+
The WAF bindings have multiple moving parts that are necessary to understand:
47+
48+
- Handle: a object wrapper over the pointer to the C WAF Handle
49+
- Context: a object wrapper over a pointer to the C WAF Context
50+
- Encoder: whose goal is to construct a tree of Waf Objects to send to the WAF
51+
- Allocator: Does all writing and allocation operations for the construction of Waf Objects
52+
- Decoder: Transforms Waf Objects returned from the WAF to usual go objects (e.g. maps, arrays, ...)
53+
- Library: The library which wraps all calls to C code
54+
55+
```mermaid
56+
flowchart LR
57+
58+
START:::hidden -->|NewHandle| Handle -->|NewContext| Context
59+
60+
Context -->|Encode Inputs| Encoder
61+
62+
Handle -->|Encode Ruleset| Encoder
63+
Handle -->|Init WAF| Library
64+
Context -->|Decode Result| Decoder
65+
66+
Handle -->|Decode Init Errors| Decoder
67+
68+
Context -->|Run| Library
69+
Context -->|Store Go References| ContextAllocator
70+
71+
Encoder -->|Allocate Waf Objects| EncoderAllocator
72+
73+
EncoderAllocator -->|Copy after each encoding| ContextAllocator
74+
75+
Library -->|Call C code| libddwaf
76+
77+
classDef hidden display: none;
78+
```
79+
80+
### Allocator
81+
82+
The cgoRefPool is a pure Go cgoRefPool of `ddwaf_object` C values on the Go memory heap.
83+
the `cgoRefPool` go type is a way to make sure we can safely send go allocated data to the C side of the WAF
84+
The main issue is the following: the `wafObject` uses a C union to store the tree structure of the full object,
85+
union equivalent in go are interfaces and they are not compatible with C unions. The only way to be 100% sure
86+
that the Go `wafObject` struct has the same layout as the C one is to only use primitive types. So the only way to
87+
store a raw pointer is to use the `uintptr` type. But since `uintptr` do not have pointer semantics (and are just
88+
basically integers), we need another structure to store the value as Go pointer because the GC is lurking. That's
89+
where the `cgoRefPool` object comes into play: all new `wafObject` elements are created via this API whose especially
90+
built to make sure there is no gap for the Garbage Collector to exploit. From there, since underlying values of the
91+
`wafObject` are either arrays (for maps, structs and arrays) or string (for all ints, booleans and strings),
92+
we can store 2 slices of arrays and use `runtime.KeepAlive` in each code path to protect them from the GC.
93+
94+
### Typical call to Run()
95+
96+
Here is an example of the flow of operations on a simple call to Run():
97+
98+
- Encode input data into Waf Objects
99+
- Lock the context mutex until the end of the call
100+
- Call `ddwaf_run`
101+
- Decode the matches and actions
102+
103+
### CGO-less C Bindings
104+
105+
The main component used to build C bindings without using CGO is called [purego](https://github.com/ebitengine/purego). The flow of execution on our side is to embed the C shared library using `go:embed`. Then to dump it into a file, load it using `dlopen` and to load the symbols using `dlsym`. And finally to call them.
106+
107+
⚠️ Keep in mind that **purego only works on linux/darwin for amd64/arm64 and so does go-libddwaf.**
108+
109+
Another requirement of `libddwaf` is to have a FHS filesystem on your machine and, for linux, to provide `libc.so.6`, `libpthread.so.0` and `libm.so.6`, `libdl.so.2` as dynamic libraries.
110+
111+
## Contributing usual pitfalls
112+
113+
- Cannot dlopen twice in the app lifetime on OSX
114+
- `runtime.KeepAlive()` calls are here to prevent the GC from destroying objects too early
115+
- Since there is a stack switch between the go code and the C code, usually the only C stacktrace you will ever get is from gdb
116+
- If a segfault happens during a call to the C code, the goroutine stacktrace which has done the call is the one annotated with `[syscall]`.
117+
- [GoLand](https://www.jetbrains.com/go/) does not support `CGO_ENABLED=0` (as of June 2023)
118+
- Keep in mind that we fully escape the type system. If you send the wrong data it will segfaults in the best cases but not always!
119+
- The structs in `ctypes.go` are here to reproduce the memory layout of the structs in `include/ddwaf.h` because pointer to these structs will be passed directly.
120+
- Do not use `uintptr` as function arguments or results types, coming from `unsafe.Pointer` casts of Go values, because they escape the pointer analysis which can create wrongly optimized code and crash. Pointer arithmetic is of course necessary in such a library but must be kept in the same function scope.

_tools/libddwaf-updater/update.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ echo Updating libddwaf for darwin/arm64
5252
curl -L https://github.com/DataDog/libddwaf/releases/download/$version/libddwaf-$version-darwin-arm64.tar.gz | tar -xz -C$tmpdir
5353
echo Copying the darwin/arm64 library
5454
cp -v $tmpdir/libddwaf-$version-darwin-arm64/lib/libddwaf.a.stripped $bindings_dir/lib/darwin-arm64/libddwaf.a
55+
cp -v "$tmpdir/libddwaf-$version-darwin-arm64/lib/libddwaf.dylib" "$bindings_dir/lib/darwin-arm64/_libddwaf.dylib"
5556

5657
#
5758
# darwin/amd64
@@ -61,6 +62,7 @@ echo Updating libddwaf for darwin/amd64yes
6162
curl -L https://github.com/DataDog/libddwaf/releases/download/$version/libddwaf-$version-darwin-x86_64.tar.gz | tar -xz -C$tmpdir
6263
echo Copying the darwin/amd64 library
6364
cp -v $tmpdir/libddwaf-$version-darwin-x86_64/lib/libddwaf.a.stripped $bindings_dir/lib/darwin-amd64/libddwaf.a
65+
cp -v "$tmpdir/libddwaf-$version-darwin-x86_64/lib/libddwaf.dylib" "$bindings_dir/lib/darwin-amd64/_libddwaf.dylib"
6466

6567
#
6668
# linux/amd64
@@ -81,6 +83,8 @@ run_binutils x86_64-linux-gnu-ld \
8183
$tmpdir/libddwaf-$version-linux-x86_64/lib/libddwaf.a $libcxx_dir/libc++.a $libcxx_dir/libc++abi.a $libcxx_dir/libunwind.a
8284
# 4. Strip
8385
run_strip x86_64-linux-gnu $bindings_dir/lib/linux-amd64/libddwaf.a
86+
cp -v "$tmpdir/libddwaf-$version-linux-x86_64/lib/libddwaf.so" "$bindings_dir/lib/linux-amd64/_libddwaf.so"
87+
run_strip x86_64-linux-gnu "$bindings_dir/lib/linux-amd64/_libddwaf.so"
8488

8589
#
8690
# linux/arm64
@@ -101,6 +105,8 @@ run_binutils aarch64-linux-gnu-ld \
101105
$tmpdir/libddwaf-$version-linux-aarch64/lib/libddwaf.a $libcxx_dir/libc++.a $libcxx_dir/libc++abi.a $libcxx_dir/libunwind.a
102106
# 4. Strip
103107
run_strip aarch64-linux-gnu $bindings_dir/lib/linux-arm64/libddwaf.a
108+
cp -v "$tmpdir/libddwaf-$version-linux-aarch64/lib/libddwaf.so" "$bindings_dir/lib/linux-arm64/_libddwaf.so"
109+
run_strip aarch64-linux-gnu "$bindings_dir/lib/linux-arm64/_libddwaf.so"
104110

105111
#
106112
# ddwaf.h

cgo_ref_pool.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0.
3+
// This product includes software developed at Datadog (https://www.datadoghq.com/).
4+
// Copyright 2016-present Datadog, Inc.
5+
6+
//go:build (linux || darwin) && (amd64 || arm64)
7+
8+
package waf
9+
10+
import (
11+
"strconv"
12+
)
13+
14+
// cgoRefPool is a way to make sure we can safely send go allocated data on the C side of the WAF
15+
// The main issue is the following: the wafObject uses a C union to store the tree structure of the full object,
16+
// union equivalent in go are interfaces and they are not compatible with C unions. The only way to be 100% sure
17+
// that the Go wafObject struct have the same layout as the C one is to only use primitive types. So the only way to
18+
// store a raw pointer is to use the uintptr type. But since uintptr do not have pointer semantics (and are just
19+
// basically integers), we need another structure to store the value as Go pointer because the GC is lurking. That's
20+
// where the cgoRefPool object comes into play: All new wafObject elements are created via this API whose especially
21+
// built to make sure there is no gap for the Garbage Collector to exploit. From there, since underlying values of the
22+
// wafObject are either arrays (for maps, structs and arrays) or string (for all ints, booleans and strings),
23+
// we can store 2 slices of arrays and use runtime.KeepAlive in each code path to protect them from the GC.
24+
type cgoRefPool struct {
25+
stringRefs [][]byte
26+
arrayRefs [][]wafObject
27+
}
28+
29+
func (refPool *cgoRefPool) append(newRefs cgoRefPool) {
30+
refPool.stringRefs = append(refPool.stringRefs, newRefs.stringRefs...)
31+
refPool.arrayRefs = append(refPool.arrayRefs, newRefs.arrayRefs...)
32+
}
33+
34+
func (refPool *cgoRefPool) AllocCString(str string) uintptr {
35+
goArray := make([]byte, len(str)+1)
36+
copy(goArray, str)
37+
refPool.stringRefs = append(refPool.stringRefs, goArray)
38+
goArray[len(str)] = 0 // Null termination byte for C strings
39+
40+
return sliceToUintptr(goArray)
41+
}
42+
43+
func (refPool *cgoRefPool) AllocWafString(obj *wafObject, str string) {
44+
obj._type = wafStringType
45+
46+
if len(str) == 0 {
47+
obj.nbEntries = 0
48+
obj.value = 0
49+
return
50+
}
51+
52+
goArray := make([]byte, len(str))
53+
copy(goArray, str)
54+
refPool.stringRefs = append(refPool.stringRefs, goArray)
55+
56+
obj.value = sliceToUintptr(goArray)
57+
obj.nbEntries = uint64(len(goArray))
58+
}
59+
60+
func (refPool *cgoRefPool) AllocWafArray(obj *wafObject, typ wafObjectType, size uint64) []wafObject {
61+
if typ != wafMapType && typ != wafArrayType {
62+
panic("Cannot allocate this waf object data type as an array: " + strconv.Itoa(int(typ)))
63+
}
64+
65+
obj._type = typ
66+
obj.nbEntries = size
67+
68+
// If the array size is zero no need to allocate anything
69+
if size == 0 {
70+
obj.value = 0
71+
return nil
72+
}
73+
74+
goArray := make([]wafObject, size)
75+
refPool.arrayRefs = append(refPool.arrayRefs, goArray)
76+
77+
obj.value = sliceToUintptr(goArray)
78+
return goArray
79+
}
80+
81+
func (refPool *cgoRefPool) AllocWafMapKey(obj *wafObject, str string) {
82+
if len(str) == 0 {
83+
return
84+
}
85+
86+
goArray := make([]byte, len(str))
87+
copy(goArray, str)
88+
refPool.stringRefs = append(refPool.stringRefs, goArray)
89+
90+
obj.parameterName = sliceToUintptr(goArray)
91+
obj.parameterNameLength = uint64(len(goArray))
92+
}

0 commit comments

Comments
 (0)