Skip to content

btf: lazy decoding of string table #1772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 19 additions & 21 deletions btf/btf.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"math"
"os"
"reflect"
"slices"

"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
Expand Down Expand Up @@ -243,6 +244,10 @@ func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder {
// fixupDatasec attempts to patch up missing info in Datasecs and its members by
// supplementing them with information from the ELF headers and symbol table.
func (elf *elfData) fixupDatasec(typ Type) error {
if elf == nil {
return nil
}

if ds, ok := typ.(*Datasec); ok {
if elf.fixups[ds] {
return nil
Expand Down Expand Up @@ -373,10 +378,6 @@ func (s *Spec) TypeByID(id TypeID) (Type, error) {
return nil, fmt.Errorf("inflate type: %w", err)
}

if s.elf == nil {
return typ, nil
}

if err := s.elf.fixupDatasec(typ); err != nil {
return nil, err
}
Expand All @@ -399,28 +400,25 @@ func (s *Spec) TypeID(typ Type) (TypeID, error) {
//
// Returns an error wrapping ErrNotFound if no matching Type exists in the Spec.
func (s *Spec) AnyTypesByName(name string) ([]Type, error) {
typeIDs := s.TypeIDsByName(newEssentialName(name))
if len(typeIDs) == 0 {
return nil, fmt.Errorf("type name %s: %w", name, ErrNotFound)
}

// Return a copy to prevent changes to namedTypes.
result := make([]Type, 0, len(typeIDs))
for _, id := range typeIDs {
typ, err := s.TypeByID(id)
if errors.Is(err, ErrNotFound) {
return nil, fmt.Errorf("no type with ID %d", id)
} else if err != nil {
return nil, err
}
types, err := s.TypesByName(newEssentialName(name))
if err != nil {
return nil, err
}

for i := 0; i < len(types); i++ {
// Match against the full name, not just the essential one
// in case the type being looked up is a struct flavor.
if typ.TypeName() == name {
result = append(result, typ)
if types[i].TypeName() != name {
types = slices.Delete(types, i, i+1)
continue
}

if err := s.elf.fixupDatasec(types[i]); err != nil {
return nil, err
}
}
return result, nil

return types, nil
}

// AnyTypeByName returns a Type with the given name.
Expand Down
17 changes: 2 additions & 15 deletions btf/btf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,22 +249,9 @@ func BenchmarkIterateVmlinux(b *testing.B) {
func TestParseCurrentKernelBTF(t *testing.T) {
spec := vmlinuxSpec(t)

if len(spec.namedTypes) == 0 {
if len(spec.offsets) == 0 {
t.Fatal("Empty kernel BTF")
}

totalBytes := 0
distinct := 0
seen := make(map[string]bool)
for _, str := range spec.strings.strings {
totalBytes += len(str)
if !seen[str] {
distinct++
seen[str] = true
}
}
t.Logf("%d strings total, %d distinct", len(spec.strings.strings), distinct)
t.Logf("Average string size: %.0f", float64(totalBytes)/float64(len(spec.strings.strings)))
}

func TestFindVMLinux(t *testing.T) {
Expand All @@ -280,7 +267,7 @@ func TestFindVMLinux(t *testing.T) {
t.Fatal("Can't load BTF:", err)
}

if len(spec.namedTypes) == 0 {
if len(spec.offsets) == 0 {
t.Fatal("Empty kernel BTF")
}
}
Expand Down
17 changes: 7 additions & 10 deletions btf/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"math"
"reflect"
"slices"
"strconv"
"strings"

Expand Down Expand Up @@ -265,16 +264,14 @@ func CORERelocate(relos []*CORERelocation, targets []*Spec, bo binary.ByteOrder,

var targetTypes []Type
for _, target := range targets {
namedTypeIDs := target.TypeIDsByName(essentialName)
targetTypes = slices.Grow(targetTypes, len(namedTypeIDs))
for _, id := range namedTypeIDs {
typ, err := target.TypeByID(id)
if err != nil {
return nil, err
}

targetTypes = append(targetTypes, typ)
namedTypes, err := target.TypesByName(essentialName)
if errors.Is(err, ErrNotFound) {
continue
} else if err != nil {
return nil, err
}

targetTypes = append(targetTypes, namedTypes...)
}

fixups, err := coreCalculateFixups(group.relos, targetTypes, bo, resolveTargetTypeID)
Expand Down
98 changes: 36 additions & 62 deletions btf/strings.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
package btf

import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"maps"
"slices"
"strings"
)

type stringTable struct {
base *stringTable
offsets []uint32
prevIdx int
strings []string
base *stringTable
bytes []byte
}

// sizedReader is implemented by bytes.Reader, io.SectionReader, strings.Reader, etc.
Expand All @@ -29,89 +25,67 @@ func readStringTable(r sizedReader, base *stringTable) (*stringTable, error) {
// from the last entry offset of the base BTF.
firstStringOffset := uint32(0)
if base != nil {
idx := len(base.offsets) - 1
firstStringOffset = base.offsets[idx] + uint32(len(base.strings[idx])) + 1
firstStringOffset = uint32(len(base.bytes))
}

// Derived from vmlinux BTF.
const averageStringLength = 16

n := int(r.Size() / averageStringLength)
offsets := make([]uint32, 0, n)
strings := make([]string, 0, n)

offset := firstStringOffset
scanner := bufio.NewScanner(r)
scanner.Split(splitNull)
for scanner.Scan() {
str := scanner.Text()
offsets = append(offsets, offset)
strings = append(strings, str)
offset += uint32(len(str)) + 1
}
if err := scanner.Err(); err != nil {
bytes := make([]byte, r.Size())
if _, err := io.ReadFull(r, bytes); err != nil {
return nil, err
}

if len(strings) == 0 {
if len(bytes) == 0 {
return nil, errors.New("string table is empty")
}

if firstStringOffset == 0 && strings[0] != "" {
return nil, errors.New("first item in string table is non-empty")
if bytes[len(bytes)-1] != 0 {
return nil, errors.New("string table isn't null terminated")
}

return &stringTable{base, offsets, 0, strings}, nil
}

func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) {
i := bytes.IndexByte(data, 0)
if i == -1 {
if atEOF && len(data) > 0 {
return 0, nil, errors.New("string table isn't null terminated")
}
return 0, nil, nil
if firstStringOffset == 0 && bytes[0] != 0 {
return nil, errors.New("first item in string table is non-empty")
}

return i + 1, data[:i], nil
return &stringTable{base: base, bytes: bytes}, nil
}

func (st *stringTable) Lookup(offset uint32) (string, error) {
if st.base != nil && offset <= st.base.offsets[len(st.base.offsets)-1] {
return st.base.lookup(offset)
// Fast path: zero offset is the empty string, looked up frequently.
if offset == 0 {
return "", nil
}
return st.lookup(offset)

b, err := st.lookupSlow(offset)
return string(b), err
}

func (st *stringTable) lookup(offset uint32) (string, error) {
func (st *stringTable) LookupBytes(offset uint32) ([]byte, error) {
// Fast path: zero offset is the empty string, looked up frequently.
if offset == 0 && st.base == nil {
return "", nil
if offset == 0 {
return nil, nil
}

// Accesses tend to be globally increasing, so check if the next string is
// the one we want. This skips the binary search in about 50% of cases.
if st.prevIdx+1 < len(st.offsets) && st.offsets[st.prevIdx+1] == offset {
st.prevIdx++
return st.strings[st.prevIdx], nil
}
return st.lookupSlow(offset)
}

i, found := slices.BinarySearch(st.offsets, offset)
if !found {
return "", fmt.Errorf("offset %d isn't start of a string", offset)
func (st *stringTable) lookupSlow(offset uint32) ([]byte, error) {
if st.base != nil {
n := uint32(len(st.base.bytes))
if offset < n {
return st.base.lookupSlow(offset)
}
offset -= n
}

// Set the new increment index, but only if its greater than the current.
if i > st.prevIdx+1 {
st.prevIdx = i
if offset > uint32(len(st.bytes)) {
return nil, fmt.Errorf("offset %d is out of bounds of string table", offset)
}

return st.strings[i], nil
}
if offset > 0 && st.bytes[offset-1] != 0 {
return nil, fmt.Errorf("offset %d is not the beginning of a string", offset)
}

// Num returns the number of strings in the table.
func (st *stringTable) Num() int {
return len(st.strings)
i := bytes.IndexByte(st.bytes[offset:], 0)
return st.bytes[offset : offset+uint32(i)], nil
}

// stringTableBuilder builds BTF string tables.
Expand Down
Loading
Loading