Skip to content

Commit 66b8f6e

Browse files
committed
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it.
1 parent 71af9de commit 66b8f6e

26 files changed

+791
-2
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ format_cpp:
273273
executor/android/android_seccomp.h \
274274
tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc \
275275
tools/clang/*.h \
276-
tools/clang/declextract/*.h tools/clang/declextract/*.cpp
276+
tools/clang/declextract/*.h tools/clang/declextract/*.cpp \
277+
tools/clang/codesearch/*.h tools/clang/codesearch/*.cpp
277278

278279
format_sys: bin/syz-fmt
279280
bin/syz-fmt all

pkg/clangtool/clangtool.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, f
155155
cfg.KernelSrc), cfg.KernelObj), "/")
156156
// Suppress warning since we may build the tool on a different clang
157157
// version that produces more warnings.
158-
data, err := exec.Command(cfg.ToolBin, "-p", dbFile, "--extra-arg=-w", file).Output()
158+
// Comments are needed for codesearch tool, but may be useful for declextract
159+
// in the future if we try to parse them with LLMs.
160+
data, err := exec.Command(cfg.ToolBin, "-p", dbFile,
161+
"--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output()
159162
if err != nil {
160163
var exitErr *exec.ExitError
161164
if errors.As(err, &exitErr) {

pkg/codesearch/codesearch.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"bytes"
8+
"fmt"
9+
"os"
10+
"path/filepath"
11+
"strings"
12+
13+
"github.com/google/syzkaller/pkg/osutil"
14+
)
15+
16+
type Index struct {
17+
db *Database
18+
srcDirs []string
19+
}
20+
21+
type Command struct {
22+
Name string
23+
NArgs int
24+
Func func(*Index, []string) (string, error)
25+
}
26+
27+
// Commands are used to run unit tests and for the syz-codesearch tool.
28+
var Commands = []Command{
29+
{"file-index", 1, func(index *Index, args []string) (string, error) {
30+
ok, entities, err := index.FileIndex(args[0])
31+
if err != nil || !ok {
32+
return notFound, err
33+
}
34+
b := new(strings.Builder)
35+
fmt.Fprintf(b, "file %v defines the following entities:\n\n", args[0])
36+
for _, ent := range entities {
37+
fmt.Fprintf(b, "%v %v\n", ent.Kind, ent.Name)
38+
}
39+
return b.String(), nil
40+
}},
41+
{"def-comment", 2, func(index *Index, args []string) (string, error) {
42+
info, err := index.DefinitionComment(args[0], args[1])
43+
if err != nil || info == nil {
44+
return notFound, err
45+
}
46+
if info.Body == "" {
47+
return fmt.Sprintf("%v %v is defined in %v and is not commented\n",
48+
info.Kind, args[1], info.File), nil
49+
}
50+
return fmt.Sprintf("%v %v is defined in %v and commented as:\n\n%v",
51+
info.Kind, args[1], info.File, info.Body), nil
52+
}},
53+
{"def-source", 3, func(index *Index, args []string) (string, error) {
54+
info, err := index.DefinitionSource(args[0], args[1], args[2] == "yes")
55+
if err != nil || info == nil {
56+
return notFound, err
57+
}
58+
return fmt.Sprintf("%v %v is defined in %v:\n\n%v", info.Kind, args[1], info.File, info.Body), nil
59+
}},
60+
}
61+
62+
const notFound = "not found\n"
63+
64+
func NewIndex(databaseFile string, srcDirs []string) (*Index, error) {
65+
db, err := osutil.ReadJSON[*Database](databaseFile)
66+
if err != nil {
67+
return nil, err
68+
}
69+
return &Index{
70+
db: db,
71+
srcDirs: srcDirs,
72+
}, nil
73+
}
74+
75+
func (index *Index) Command(cmd string, args []string) (string, error) {
76+
for _, meta := range Commands {
77+
if cmd == meta.Name {
78+
if len(args) != meta.NArgs {
79+
return "", fmt.Errorf("codesearch command %v requires %v args, but %v provided",
80+
cmd, meta.NArgs, len(args))
81+
}
82+
return meta.Func(index, args)
83+
}
84+
}
85+
return "", fmt.Errorf("unknown codesearch command %v", cmd)
86+
}
87+
88+
type Entity struct {
89+
Kind string
90+
Name string
91+
}
92+
93+
func (index *Index) FileIndex(file string) (bool, []Entity, error) {
94+
var entities []Entity
95+
for _, def := range index.db.Definitions {
96+
if def.Body.File == file {
97+
entities = append(entities, Entity{
98+
Kind: def.Kind,
99+
Name: def.Name,
100+
})
101+
}
102+
}
103+
return len(entities) != 0, entities, nil
104+
}
105+
106+
type EntityInfo struct {
107+
File string
108+
Kind string
109+
Body string
110+
}
111+
112+
func (index *Index) DefinitionComment(contextFile, name string) (*EntityInfo, error) {
113+
return index.definitionSource(contextFile, name, true, false)
114+
}
115+
116+
func (index *Index) DefinitionSource(contextFile, name string, includeLines bool) (*EntityInfo, error) {
117+
return index.definitionSource(contextFile, name, false, includeLines)
118+
}
119+
120+
func (index *Index) definitionSource(contextFile, name string, comment, includeLines bool) (*EntityInfo, error) {
121+
def := index.findDefinition(contextFile, name)
122+
if def == nil {
123+
return nil, nil
124+
}
125+
lineRange := def.Body
126+
if comment {
127+
lineRange = def.Comment
128+
}
129+
src, err := index.formatSource(lineRange, includeLines)
130+
if err != nil {
131+
return nil, err
132+
}
133+
return &EntityInfo{
134+
File: def.Body.File,
135+
Kind: def.Kind,
136+
Body: src,
137+
}, nil
138+
}
139+
140+
func (index *Index) findDefinition(contextFile, name string) *Definition {
141+
var weakMatch *Definition
142+
for _, def := range index.db.Definitions {
143+
if def.Name == name {
144+
if def.Body.File == contextFile {
145+
return def
146+
}
147+
if !def.IsStatic {
148+
weakMatch = def
149+
}
150+
}
151+
}
152+
return weakMatch
153+
}
154+
155+
func (index *Index) formatSource(lines LineRange, includeLines bool) (string, error) {
156+
if lines.File == "" {
157+
return "", nil
158+
}
159+
for _, dir := range index.srcDirs {
160+
file := filepath.Join(dir, lines.File)
161+
if !osutil.IsExist(file) {
162+
continue
163+
}
164+
return formatSourceFile(file, lines.StartLine, lines.EndLine, includeLines)
165+
}
166+
return "", fmt.Errorf("codesearch: can't find %q file in any of %v", lines.File, index.srcDirs)
167+
}
168+
169+
func formatSourceFile(file string, start, end int, includeLines bool) (string, error) {
170+
data, err := os.ReadFile(file)
171+
if err != nil {
172+
return "", err
173+
}
174+
lines := bytes.Split(data, []byte{'\n'})
175+
start--
176+
end--
177+
if start < 0 || end < start || end > len(lines) {
178+
return "", fmt.Errorf("codesearch: bad line range [%v-%v] for file %v with %v lines",
179+
start, end, file, len(lines))
180+
}
181+
b := new(strings.Builder)
182+
for line := start; line <= end; line++ {
183+
if includeLines {
184+
fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line])
185+
} else {
186+
fmt.Fprintf(b, "%s\n", lines[line])
187+
}
188+
}
189+
return b.String(), nil
190+
}

pkg/codesearch/codesearch_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"bytes"
8+
"os"
9+
"path/filepath"
10+
"strings"
11+
"testing"
12+
13+
"github.com/google/syzkaller/pkg/clangtool/tooltest"
14+
"github.com/google/syzkaller/pkg/osutil"
15+
)
16+
17+
func TestClangTool(t *testing.T) {
18+
tooltest.TestClangTool[Database](t)
19+
}
20+
21+
func TestCommands(t *testing.T) {
22+
db := tooltest.LoadOutput[Database](t)
23+
index := &Index{db, []string{"testdata"}}
24+
files, err := filepath.Glob(filepath.Join(osutil.Abs("testdata"), "query*"))
25+
if err != nil {
26+
t.Fatal(err)
27+
}
28+
if len(files) == 0 {
29+
t.Fatal("found no qeury files")
30+
}
31+
covered := make(map[string]bool)
32+
for _, file := range files {
33+
t.Run(filepath.Base(file), func(t *testing.T) {
34+
testCommand(t, index, covered, file)
35+
})
36+
}
37+
for _, cmd := range Commands {
38+
if !covered[cmd.Name] {
39+
t.Errorf("command %v is not covered, add at least one test", cmd.Name)
40+
}
41+
}
42+
}
43+
44+
func testCommand(t *testing.T, index *Index, covered map[string]bool, file string) {
45+
data, err := os.ReadFile(file)
46+
if err != nil {
47+
t.Fatal(err)
48+
}
49+
query, _, _ := bytes.Cut(data, []byte{'\n'})
50+
args := strings.Fields(string(query))
51+
if len(args) == 0 {
52+
t.Fatal("no command found")
53+
}
54+
result, err := index.Command(args[0], args[1:])
55+
if err != nil {
56+
t.Fatal(err)
57+
}
58+
got := append([]byte(strings.Join(args, " ")+"\n\n"), result...)
59+
tooltest.CompareGoldenData(t, file, got)
60+
covered[args[0]] = true
61+
}

pkg/codesearch/database.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"strings"
8+
9+
"github.com/google/syzkaller/pkg/clangtool"
10+
)
11+
12+
type Database struct {
13+
Definitions []*Definition `json:"definitions,omitempty"`
14+
}
15+
16+
type Definition struct {
17+
Kind string `json:"kind,omitempty"`
18+
Name string `json:"name,omitempty"`
19+
Type string `json:"type,omitempty"`
20+
IsStatic bool `json:"is_static,omitempty"`
21+
Body LineRange `json:"body,omitempty"`
22+
Comment LineRange `json:"comment,omitempty"`
23+
}
24+
25+
type LineRange struct {
26+
File string `json:"file,omitempty"`
27+
StartLine int `json:"start_line,omitempty"`
28+
EndLine int `json:"end_line,omitempty"`
29+
}
30+
31+
func (db *Database) Merge(other *Database) {
32+
db.Definitions = append(db.Definitions, other.Definitions...)
33+
}
34+
35+
func (db *Database) Finalize(v *clangtool.Verifier) {
36+
db.Definitions = clangtool.SortAndDedupSlice(db.Definitions)
37+
38+
for _, def := range db.Definitions {
39+
v.LineRange(def.Body.File, def.Body.StartLine, def.Body.EndLine)
40+
if def.Comment.File != "" {
41+
v.LineRange(def.Comment.File, def.Comment.StartLine, def.Comment.EndLine)
42+
}
43+
}
44+
}
45+
46+
// SetSoureFile attaches the source file to the entities that need it.
47+
// The clang tool could do it, but it looks easier to do it here.
48+
func (db *Database) SetSourceFile(file string, updatePath func(string) string) {
49+
for _, def := range db.Definitions {
50+
def.Body.File = updatePath(def.Body.File)
51+
def.Comment.File = updatePath(def.Comment.File)
52+
if strings.HasSuffix(def.Body.File, ".c") && def.Body.File != file {
53+
def.IsStatic = false
54+
}
55+
}
56+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def-comment source0.c close
2+
3+
function close is defined in source0.c and is not commented
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def-comment source0.c function_with_comment_in_header
2+
3+
function function_with_comment_in_header is defined in source0.c and is not commented
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
def-comment source0.c open
2+
3+
function open is defined in source0.c and commented as:
4+
5+
/*
6+
* Comment about open.
7+
*/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def-source source0.c close no
2+
3+
function close is defined in source0.c:
4+
5+
int close()
6+
{
7+
return 0;
8+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def-source source0.c function_with_comment_in_header yes
2+
3+
function function_with_comment_in_header is defined in source0.c:
4+
5+
18: void function_with_comment_in_header()
6+
19: {
7+
20: same_name_in_several_files();
8+
21: }

0 commit comments

Comments
 (0)