Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ format_cpp:
executor/android/android_seccomp.h \
tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc \
tools/clang/*.h \
tools/clang/declextract/*.h tools/clang/declextract/*.cpp
tools/clang/declextract/*.h tools/clang/declextract/*.cpp \
tools/clang/codesearch/*.h tools/clang/codesearch/*.cpp

format_sys: bin/syz-fmt
bin/syz-fmt all
Expand Down
64 changes: 61 additions & 3 deletions pkg/clangtool/clangtool.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type OutputDataPtr[T any] interface {
*T
Merge(*T)
SetSourceFile(string, func(filename string) string)
SortAndDedup()
Finalize(*Verifier)
}

// Run runs the clang tool on all files in the compilation database
Expand Down Expand Up @@ -81,7 +81,15 @@ func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, e
}
out.Merge(res.out)
}
out.SortAndDedup()
// Finalize the output (sort, dedup, etc), and let the output verify
// that all source file names, line numbers, etc are valid/present.
// If there are any bogus entries, it's better to detect them early,
// than to crash/error much later when the info is used.
// Some of the source files (generated) may be in the obj dir.
srcDirs := []string{cfg.KernelSrc, cfg.KernelObj}
if err := Finalize(out, srcDirs); err != nil {
return nil, err
}
if cfg.CacheFile != "" {
osutil.MkdirAll(filepath.Dir(cfg.CacheFile))
data, err := json.MarshalIndent(out, "", "\t")
Expand All @@ -95,12 +103,62 @@ func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, e
return out, nil
}

func Finalize[Output any, OutputPtr OutputDataPtr[Output]](out OutputPtr, srcDirs []string) error {
v := &Verifier{
srcDirs: srcDirs,
fileCache: make(map[string]int),
}
out.Finalize(v)
if v.err.Len() == 0 {
return nil
}
return errors.New(v.err.String())
}

type Verifier struct {
srcDirs []string
fileCache map[string]int // file->line count (-1 is cached for missing files)
err strings.Builder
}

func (v *Verifier) Filename(file string) {
if _, ok := v.fileCache[file]; ok {
return
}
for _, srcDir := range v.srcDirs {
data, err := os.ReadFile(filepath.Join(srcDir, file))
if err != nil {
continue
}
v.fileCache[file] = len(bytes.Split(data, []byte{'\n'}))
return
}
v.fileCache[file] = -1
fmt.Fprintf(&v.err, "missing file: %v\n", file)
}

func (v *Verifier) LineRange(file string, start, end int) {
v.Filename(file)
lines, ok := v.fileCache[file]
if !ok || lines < 0 {
return
}
// Line numbers produced by clang are 1-based.
if start <= 0 || end < start || end > lines {
fmt.Fprintf(&v.err, "bad line range [%v-%v] for file %v with %v lines\n",
start, end, file, lines)
}
}

func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, file string) (OutputPtr, error) {
relFile := strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(filepath.Clean(file),
cfg.KernelSrc), cfg.KernelObj), "/")
// Suppress warning since we may build the tool on a different clang
// version that produces more warnings.
data, err := exec.Command(cfg.ToolBin, "-p", dbFile, "--extra-arg=-w", file).Output()
// Comments are needed for codesearch tool, but may be useful for declextract
// in the future if we try to parse them with LLMs.
data, err := exec.Command(cfg.ToolBin, "-p", dbFile,
"--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
Expand Down
4 changes: 3 additions & 1 deletion pkg/clangtool/tooltest/tooltest.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ func LoadOutput[Output any, OutputPtr clangtool.OutputDataPtr[Output]](t *testin
}
out.Merge(tmp)
})
out.SortAndDedup()
if err := clangtool.Finalize(out, []string{"testdata"}); err != nil {
t.Fatal(err)
}
return out
}

Expand Down
190 changes: 190 additions & 0 deletions pkg/codesearch/codesearch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package codesearch

import (
"bytes"
"fmt"
"os"
"path/filepath"
"strings"

"github.com/google/syzkaller/pkg/osutil"
)

type Index struct {
db *Database
srcDirs []string
}

type Command struct {
Name string
NArgs int
Func func(*Index, []string) (string, error)
}

// Commands are used to run unit tests and for the syz-codesearch tool.
var Commands = []Command{
{"file-index", 1, func(index *Index, args []string) (string, error) {
ok, entities, err := index.FileIndex(args[0])
if err != nil || !ok {
return notFound, err
}
b := new(strings.Builder)
fmt.Fprintf(b, "file %v defines the following entities:\n\n", args[0])
for _, ent := range entities {
fmt.Fprintf(b, "%v %v\n", ent.Kind, ent.Name)
}
return b.String(), nil
}},
{"def-comment", 2, func(index *Index, args []string) (string, error) {
info, err := index.DefinitionComment(args[0], args[1])
if err != nil || info == nil {
return notFound, err
}
if info.Body == "" {
return fmt.Sprintf("%v %v is defined in %v and is not commented\n",
info.Kind, args[1], info.File), nil
}
return fmt.Sprintf("%v %v is defined in %v and commented as:\n\n%v",
info.Kind, args[1], info.File, info.Body), nil
}},
{"def-source", 3, func(index *Index, args []string) (string, error) {
info, err := index.DefinitionSource(args[0], args[1], args[2] == "yes")
if err != nil || info == nil {
return notFound, err
}
return fmt.Sprintf("%v %v is defined in %v:\n\n%v", info.Kind, args[1], info.File, info.Body), nil
}},
}

const notFound = "not found\n"

func NewIndex(databaseFile string, srcDirs []string) (*Index, error) {
db, err := osutil.ReadJSON[*Database](databaseFile)
if err != nil {
return nil, err
}
return &Index{
db: db,
srcDirs: srcDirs,
}, nil
}

func (index *Index) Command(cmd string, args []string) (string, error) {
for _, meta := range Commands {
if cmd == meta.Name {
if len(args) != meta.NArgs {
return "", fmt.Errorf("codesearch command %v requires %v args, but %v provided",
cmd, meta.NArgs, len(args))
}
return meta.Func(index, args)
}
}
return "", fmt.Errorf("unknown codesearch command %v", cmd)
}

type Entity struct {
Kind string
Name string
}

func (index *Index) FileIndex(file string) (bool, []Entity, error) {
var entities []Entity
for _, def := range index.db.Definitions {
if def.Body.File == file {
entities = append(entities, Entity{
Kind: def.Kind,
Name: def.Name,
})
}
}
return len(entities) != 0, entities, nil
}

type EntityInfo struct {
File string
Kind string
Body string
}

func (index *Index) DefinitionComment(contextFile, name string) (*EntityInfo, error) {
return index.definitionSource(contextFile, name, true, false)
}

func (index *Index) DefinitionSource(contextFile, name string, includeLines bool) (*EntityInfo, error) {
return index.definitionSource(contextFile, name, false, includeLines)
}

func (index *Index) definitionSource(contextFile, name string, comment, includeLines bool) (*EntityInfo, error) {
def := index.findDefinition(contextFile, name)
if def == nil {
return nil, nil
}
lineRange := def.Body
if comment {
lineRange = def.Comment
}
src, err := index.formatSource(lineRange, includeLines)
if err != nil {
return nil, err
}
return &EntityInfo{
File: def.Body.File,
Kind: def.Kind,
Body: src,
}, nil
}

func (index *Index) findDefinition(contextFile, name string) *Definition {
var weakMatch *Definition
for _, def := range index.db.Definitions {
if def.Name == name {
if def.Body.File == contextFile {
return def
}
if !def.IsStatic {
weakMatch = def
}
}
}
return weakMatch
}

func (index *Index) formatSource(lines LineRange, includeLines bool) (string, error) {
if lines.File == "" {
return "", nil
}
for _, dir := range index.srcDirs {
file := filepath.Join(dir, lines.File)
if !osutil.IsExist(file) {
continue
}
return formatSourceFile(file, lines.StartLine, lines.EndLine, includeLines)
}
return "", fmt.Errorf("codesearch: can't find %q file in any of %v", lines.File, index.srcDirs)
}

func formatSourceFile(file string, start, end int, includeLines bool) (string, error) {
data, err := os.ReadFile(file)
if err != nil {
return "", err
}
lines := bytes.Split(data, []byte{'\n'})
start--
end--
if start < 0 || end < start || end > len(lines) {
return "", fmt.Errorf("codesearch: bad line range [%v-%v] for file %v with %v lines",
start, end, file, len(lines))
}
b := new(strings.Builder)
for line := start; line <= end; line++ {
if includeLines {
fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line])
} else {
fmt.Fprintf(b, "%s\n", lines[line])
}
}
return b.String(), nil
}
61 changes: 61 additions & 0 deletions pkg/codesearch/codesearch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2025 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package codesearch

import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"

"github.com/google/syzkaller/pkg/clangtool/tooltest"
"github.com/google/syzkaller/pkg/osutil"
)

func TestClangTool(t *testing.T) {
tooltest.TestClangTool[Database](t)
}

func TestCommands(t *testing.T) {
db := tooltest.LoadOutput[Database](t)
index := &Index{db, []string{"testdata"}}
files, err := filepath.Glob(filepath.Join(osutil.Abs("testdata"), "query*"))
if err != nil {
t.Fatal(err)
}
if len(files) == 0 {
t.Fatal("found no qeury files")
}
covered := make(map[string]bool)
for _, file := range files {
t.Run(filepath.Base(file), func(t *testing.T) {
testCommand(t, index, covered, file)
})
}
for _, cmd := range Commands {
if !covered[cmd.Name] {
t.Errorf("command %v is not covered, add at least one test", cmd.Name)
}
}
}

func testCommand(t *testing.T, index *Index, covered map[string]bool, file string) {
data, err := os.ReadFile(file)
if err != nil {
t.Fatal(err)
}
query, _, _ := bytes.Cut(data, []byte{'\n'})
args := strings.Fields(string(query))
if len(args) == 0 {
t.Fatal("no command found")
}
result, err := index.Command(args[0], args[1:])
if err != nil {
t.Fatal(err)
}
got := append([]byte(strings.Join(args, " ")+"\n\n"), result...)
tooltest.CompareGoldenData(t, file, got)
covered[args[0]] = true
}
Loading
Loading