Skip to content

Commit d8cb5a0

Browse files
authored
Merge pull request #152 from parca-dev/rtld
Use rtld map_complete usdt probe to trigger process sync
2 parents b93263b + 66e48c3 commit d8cb5a0

File tree

24 files changed

+488
-36
lines changed

24 files changed

+488
-36
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ require (
88
github.com/aws/aws-sdk-go-v2/service/s3 v1.87.0
99
github.com/cespare/xxhash/v2 v2.3.0
1010
github.com/cilium/ebpf v0.19.0
11+
github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb
1112
github.com/docker/go-connections v0.5.0
1213
github.com/elastic/go-freelru v0.16.0
1314
github.com/elastic/go-perf v0.0.0-20241029065020-30bec95324b8

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpS
5959
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
6060
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
6161
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
62+
github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb h1:GIzvVQ9UkUlOhSDlqmrQAAAUd6R3E+caIisNEyWXvNE=
63+
github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
6264
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
6365
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
6466
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=

internal/controller/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"go.opentelemetry.io/ebpf-profiler/reporter"
1313
"go.opentelemetry.io/ebpf-profiler/tracer"
14+
"go.opentelemetry.io/ebpf-profiler/util"
1415
)
1516

1617
type Config struct {
@@ -95,7 +96,7 @@ func (cfg *Config) Validate() error {
9596
}
9697

9798
if !cfg.NoKernelVersionCheck {
98-
major, minor, patch, err := tracer.GetCurrentKernelVersion()
99+
major, minor, patch, err := util.GetCurrentKernelVersion()
99100
if err != nil {
100101
return fmt.Errorf("failed to get kernel version: %v", err)
101102
}

interpreter/luajit/mappings_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@ import (
1515
"testing"
1616
"unsafe"
1717

18+
"github.com/cilium/ebpf/link"
1819
"github.com/stretchr/testify/require"
1920
"go.opentelemetry.io/ebpf-profiler/host"
2021
"go.opentelemetry.io/ebpf-profiler/interpreter"
2122
"go.opentelemetry.io/ebpf-profiler/libpf"
23+
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
2224
"go.opentelemetry.io/ebpf-profiler/lpm"
2325
"go.opentelemetry.io/ebpf-profiler/process"
2426
"go.opentelemetry.io/ebpf-profiler/util"
@@ -65,6 +67,11 @@ func (m *ebpfMapsMockup) DeleteProcData(libpf.InterpreterType, libpf.PID) error
6567
return nil
6668
}
6769

70+
func (mockup *ebpfMapsMockup) AttachUSDTProbe(_ libpf.PID, _ string, _ pfelf.USDTProbe,
71+
_ string) (link.Link, error) {
72+
return nil, nil
73+
}
74+
6875
// TestSynchronizeMappings tests that if a mapping is realloc'd we do the right thing.
6976
func TestSynchronizeMappings(t *testing.T) {
7077
for _, tc := range []struct {

interpreter/rtld/rtld.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package rtld // import "go.opentelemetry.io/ebpf-profiler/interpreter/rtld"
5+
6+
import (
7+
"fmt"
8+
"strings"
9+
10+
"github.com/cilium/ebpf/link"
11+
log "github.com/sirupsen/logrus"
12+
"go.opentelemetry.io/ebpf-profiler/interpreter"
13+
"go.opentelemetry.io/ebpf-profiler/libpf"
14+
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
15+
"go.opentelemetry.io/ebpf-profiler/remotememory"
16+
)
17+
18+
// data holds the Uprobe link to keep it in memory
19+
type data struct {
20+
probe pfelf.USDTProbe
21+
path string
22+
probeLink link.Link
23+
}
24+
25+
// instance represents a per-PID instance of the rtld interpreter
26+
type instance struct {
27+
interpreter.InstanceStubs
28+
link link.Link
29+
}
30+
31+
// Loader detects if the ELF file contains the rtld:map_complete USDT probe
32+
func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) {
33+
// Check if this is ld.so by examining the filename
34+
fileName := info.FileName()
35+
if !strings.Contains(fileName, "ld-") && !strings.Contains(fileName, "ld.so") {
36+
return nil, nil
37+
}
38+
39+
ef, err := info.GetELF()
40+
if err != nil {
41+
return nil, err
42+
}
43+
44+
// Look for .note.stapsdt section which contains USDT probes
45+
sec := ef.Section(".note.stapsdt")
46+
if sec == nil {
47+
log.Debugf("No .note.stapsdt section found in %s", fileName)
48+
return nil, nil
49+
}
50+
51+
// Parse USDT probes from the section
52+
probes, err := pfelf.ParseUSDTProbes(sec)
53+
if err != nil {
54+
return nil, fmt.Errorf("failed to parse USDT probes: %w", err)
55+
}
56+
57+
// Look for rtld:map_complete probe
58+
for _, probe := range probes {
59+
if probe.Provider == "rtld" && probe.Name == "map_complete" {
60+
log.Infof("Found rtld:map_complete USDT probe in %s at 0x%x",
61+
fileName, probe.Location)
62+
return &data{
63+
path: fileName,
64+
probe: probe,
65+
}, nil
66+
}
67+
}
68+
69+
return nil, nil
70+
}
71+
72+
// Attach attaches the uprobe to the rtld:map_complete USDT probe
73+
func (d *data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Address,
74+
_ remotememory.RemoteMemory) (interpreter.Instance, error) {
75+
link, err := ebpf.AttachUSDTProbe(pid, d.path, d.probe, "usdt_rtld_map_complete")
76+
if err != nil {
77+
return nil, fmt.Errorf("failed to attach uprobe to rtld:map_complete usdt: %w", err)
78+
}
79+
log.Infof("Attached uprobe to rtld:map_complete usdt in PID %d", pid)
80+
return &instance{link: link}, nil
81+
}
82+
83+
// Detach removes the uprobe
84+
func (i *instance) Detach(_ interpreter.EbpfHandler, pid libpf.PID) error {
85+
log.Debugf("[rtld] Detach called for PID %d", pid)
86+
return nil
87+
}
88+
89+
// Unload cleans up the uprobe link
90+
func (d *data) Unload(_ interpreter.EbpfHandler) {
91+
if d.probeLink != nil {
92+
if err := d.probeLink.Close(); err != nil {
93+
log.Errorf("[rtld] Failed to close uprobe link: %v", err)
94+
}
95+
d.probeLink = nil
96+
}
97+
log.Debugf("[rtld] Unloaded uprobe for %s", d.path)
98+
}

interpreter/rtld/rtld_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build amd64 && !integration
5+
6+
package rtld_test
7+
8+
import (
9+
"context"
10+
"testing"
11+
"time"
12+
"unsafe"
13+
14+
"github.com/coreos/pkg/dlopen"
15+
"github.com/stretchr/testify/require"
16+
"go.opentelemetry.io/ebpf-profiler/metrics"
17+
"go.opentelemetry.io/ebpf-profiler/support"
18+
"go.opentelemetry.io/ebpf-profiler/testutils"
19+
"go.opentelemetry.io/ebpf-profiler/tracer"
20+
tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types"
21+
)
22+
23+
func TestIntegration(t *testing.T) {
24+
if !testutils.IsRoot() {
25+
t.Skip("This test requires root privileges")
26+
}
27+
28+
// Create a context for the tracer
29+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
30+
defer cancel()
31+
32+
// Start the tracer with all tracers enabled
33+
traceCh, trc := testutils.StartTracer(ctx, t,
34+
tracertypes.AllTracers(),
35+
&testutils.MockReporter{},
36+
false)
37+
defer trc.Close()
38+
39+
// Consume traces to prevent blocking
40+
go func() {
41+
for {
42+
select {
43+
case <-ctx.Done():
44+
return
45+
case <-traceCh:
46+
// Discard traces
47+
}
48+
}
49+
}()
50+
51+
// retry a few times to get the metric, our process has to be detected and
52+
// the rtld interpreter has to attach.
53+
require.Eventually(t, func() bool {
54+
// Get the initial metric value
55+
initialCount := getEBPFMetricValue(trc, metrics.IDRtldMapCompleteHits)
56+
t.Logf("Initial rtld:map_complete metric count: %d", initialCount)
57+
58+
// Use dlopen to load a shared library
59+
// libm is a standard math library that's always present
60+
lib, err := dlopen.GetHandle([]string{
61+
"/lib/x86_64-linux-gnu/libm.so.6",
62+
"libm.so.6",
63+
})
64+
require.NoError(t, err, "Failed to open libm.so.6")
65+
defer lib.Close()
66+
67+
// Get the metrics after dlopen
68+
finalCount := getEBPFMetricValue(trc, metrics.IDRtldMapCompleteHits)
69+
t.Logf("Final rtld:map_complete metric count: %d", finalCount)
70+
71+
// Check that the metric was incremented
72+
return finalCount > initialCount
73+
}, 10*time.Second, 50*time.Millisecond)
74+
}
75+
76+
func getEBPFMetricValue(trc *tracer.Tracer, metricID metrics.MetricID) uint64 {
77+
// Access the eBPF maps directly using the public method
78+
ebpfMaps := trc.GetEbpfMaps()
79+
metricsMap, ok := ebpfMaps["metrics"]
80+
if !ok {
81+
return 0
82+
}
83+
84+
// Find the eBPF metric ID that corresponds to our metrics.MetricID
85+
var ebpfMetricID uint32
86+
for ebpfID, id := range support.MetricsTranslation {
87+
if id == metricID {
88+
ebpfMetricID = uint32(ebpfID)
89+
break
90+
}
91+
}
92+
93+
// Read the per-CPU values
94+
var perCPUValues []uint64
95+
if err := metricsMap.Lookup(unsafe.Pointer(&ebpfMetricID), &perCPUValues); err != nil {
96+
return 0
97+
}
98+
99+
// Sum all per-CPU values
100+
var total uint64
101+
for _, val := range perCPUValues {
102+
total += val
103+
}
104+
return total
105+
}

interpreter/types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import (
77
"errors"
88
"unsafe"
99

10+
"github.com/cilium/ebpf/link"
1011
"go.opentelemetry.io/ebpf-profiler/host"
1112
"go.opentelemetry.io/ebpf-profiler/libpf"
13+
"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
1214
"go.opentelemetry.io/ebpf-profiler/lpm"
1315
"go.opentelemetry.io/ebpf-profiler/metrics"
1416
"go.opentelemetry.io/ebpf-profiler/process"
@@ -112,6 +114,10 @@ type EbpfHandler interface {
112114

113115
// If unwinder needs special behavior for coredump mode to work use this.
114116
CoredumpTest() bool
117+
118+
// AttachUSDTProbe attaches a uprobe to the given USDT probe in the given process.
119+
AttachUSDTProbe(pid libpf.PID, path string, probe pfelf.USDTProbe,
120+
progName string) (link.Link, error)
115121
}
116122

117123
// Loader is a function to detect and load data from given interpreter ELF file.

libpf/pfelf/usdt.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package pfelf // import "go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
5+
6+
import (
7+
"encoding/binary"
8+
"strings"
9+
)
10+
11+
// USDTProbe represents a USDT probe found in ELF
12+
type USDTProbe struct {
13+
Provider string
14+
Name string
15+
Location uint64
16+
Base uint64
17+
SemaphoreOffset uint64
18+
Arguments string
19+
}
20+
21+
// ParseUSDTProbes reads USDT probe information from ELF .note.stapsdt section
22+
func ParseUSDTProbes(section *Section) ([]USDTProbe, error) {
23+
var probes []USDTProbe
24+
25+
// Find .note.stapsdt section
26+
data, err := section.Data(16 * 1024)
27+
if err != nil {
28+
return nil, err
29+
}
30+
31+
// Parse note entries
32+
offset := 0
33+
for offset < len(data) {
34+
if offset+12 > len(data) {
35+
break
36+
}
37+
38+
// Note header: namesz(4) + descsz(4) + type(4)
39+
namesz := binary.LittleEndian.Uint32(data[offset : offset+4])
40+
descsz := binary.LittleEndian.Uint32(data[offset+4 : offset+8])
41+
noteType := binary.LittleEndian.Uint32(data[offset+8 : offset+12])
42+
offset += 12
43+
44+
if noteType != 3 { // NT_STAPSDT
45+
// Skip this note
46+
nameEnd := offset + int((namesz+3)&^3) // align to 4 bytes
47+
descEnd := nameEnd + int((descsz+3)&^3)
48+
offset = descEnd
49+
continue
50+
}
51+
52+
// Skip owner name (should be "stapsdt")
53+
nameEnd := offset + int((namesz+3)&^3)
54+
55+
if nameEnd+int(descsz) > len(data) {
56+
break
57+
}
58+
59+
// Parse descriptor
60+
desc := data[nameEnd : nameEnd+int(descsz)]
61+
if len(desc) < 24 { // 3 uint64 values
62+
offset = nameEnd + int((descsz+3)&^3)
63+
continue
64+
}
65+
66+
location := binary.LittleEndian.Uint64(desc[0:8])
67+
base := binary.LittleEndian.Uint64(desc[8:16])
68+
semaphore := binary.LittleEndian.Uint64(desc[16:24])
69+
70+
// Parse strings: provider\0probe\0arguments\0
71+
stringData := desc[24:]
72+
strings := strings.Split(string(stringData), "\x00")
73+
if len(strings) >= 3 {
74+
probe := USDTProbe{
75+
Provider: strings[0],
76+
Name: strings[1],
77+
Location: location,
78+
Base: base,
79+
SemaphoreOffset: semaphore,
80+
Arguments: strings[2],
81+
}
82+
probes = append(probes, probe)
83+
}
84+
85+
offset = nameEnd + int((descsz+3)&^3)
86+
}
87+
88+
return probes, nil
89+
}

metrics/ids.go

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

metrics/metrics.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,5 +2211,12 @@
22112211
"name": "UnwindNodeAsyncIdErrReadIdDouble",
22122212
"field": "bpf.nodejs_async_id.errors.read_id_double",
22132213
"id": 307
2214+
},
2215+
{
2216+
"description": "Number of times rtld:map_complete USDT probe was fired",
2217+
"type": "counter",
2218+
"name": "RtldMapCompleteHits",
2219+
"field": "bpf.rtld.map_complete_hits",
2220+
"id": 308
22142221
}
22152222
]

0 commit comments

Comments
 (0)