Skip to content

Commit 4de58a2

Browse files
committed
pkg/codesearch: add skeleton for code searching tool
Add a clang tool that is used for code indexing (tools/clang/codesearch/). It follows conventions and build procedure of the declextract tool. Add pkg/codesearch package that aggregates the info exposed by the clang tools, and allows doing simple queries: - show source code of an entity (function, struct, etc) - show entity comment - show all entities defined in a source file Add tools/syz-codesearch wrapper tool that allows to create index for a kernel build, and then run code queries on it.
1 parent 1aab93a commit 4de58a2

26 files changed

+778
-2
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,8 @@ format_cpp:
273273
executor/android/android_seccomp.h \
274274
tools/kcovtrace/*.c tools/kcovfuzzer/*.c tools/fops_probe/*.cc \
275275
tools/clang/*.h \
276-
tools/clang/declextract/*.h tools/clang/declextract/*.cpp
276+
tools/clang/declextract/*.h tools/clang/declextract/*.cpp \
277+
tools/clang/codesearch/*.h tools/clang/codesearch/*.cpp
277278

278279
format_sys: bin/syz-fmt
279280
bin/syz-fmt all

pkg/clangtool/clangtool.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, f
155155
cfg.KernelSrc), cfg.KernelObj), "/")
156156
// Suppress warning since we may build the tool on a different clang
157157
// version that produces more warnings.
158-
data, err := exec.Command(cfg.ToolBin, "-p", dbFile, "--extra-arg=-w", file).Output()
158+
// Comments are needed for codesearch tool, but may be useful for declextract
159+
// in the future if we try to parse them with LLMs.
160+
data, err := exec.Command(cfg.ToolBin, "-p", dbFile,
161+
"--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output()
159162
if err != nil {
160163
var exitErr *exec.ExitError
161164
if errors.As(err, &exitErr) {

pkg/codesearch/codesearch.go

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"bytes"
8+
"fmt"
9+
"os"
10+
"path/filepath"
11+
"strings"
12+
13+
"github.com/google/syzkaller/pkg/osutil"
14+
)
15+
16+
type Index struct {
17+
db *Database
18+
srcDirs []string
19+
}
20+
21+
type Command struct {
22+
Name string
23+
NArgs int
24+
Func func(*Index, []string) (string, error)
25+
}
26+
27+
// Commands are used to run unit tests and for the syz-codesearch tool.
28+
var Commands = []Command{
29+
{"file-index", 1, func(index *Index, args []string) (string, error) {
30+
ok, entities, err := index.FileIndex(args[0])
31+
if err != nil || !ok {
32+
return notFound, err
33+
}
34+
b := new(strings.Builder)
35+
fmt.Fprintf(b, "file %v defines the following entities:\n\n", args[0])
36+
for _, ent := range entities {
37+
fmt.Fprintf(b, "%v %v\n", ent.Kind, ent.Name)
38+
}
39+
return b.String(), nil
40+
}},
41+
{"def-comment", 2, func(index *Index, args []string) (string, error) {
42+
ok, file, kind, comment, err := index.DefinitionComment(args[0], args[1])
43+
if err != nil || !ok {
44+
return notFound, err
45+
}
46+
if comment == "" {
47+
return fmt.Sprintf("%v %v is defined in %v and is not commented\n", kind, args[1], file), nil
48+
}
49+
return fmt.Sprintf("%v %v is defined in %v and commented as:\n\n%v", kind, args[1], file, comment), nil
50+
}},
51+
{"def-source", 3, func(index *Index, args []string) (string, error) {
52+
ok, file, kind, source, err := index.DefinitionSource(args[0], args[1], args[2] == "yes")
53+
if err != nil || !ok {
54+
return notFound, err
55+
}
56+
return fmt.Sprintf("%v %v is defined in %v:\n\n%v", kind, args[1], file, source), nil
57+
}},
58+
}
59+
60+
const notFound = "not found\n"
61+
62+
func NewIndex(databaseFile string, srcDirs []string) (*Index, error) {
63+
db, err := osutil.ReadJSON[*Database](databaseFile)
64+
if err != nil {
65+
return nil, err
66+
}
67+
return &Index{
68+
db: db,
69+
srcDirs: srcDirs,
70+
}, nil
71+
}
72+
73+
func (index *Index) Command(cmd string, args []string) (string, error) {
74+
for _, meta := range Commands {
75+
if cmd == meta.Name {
76+
if len(args) != meta.NArgs {
77+
return "", fmt.Errorf("codesearch command %v requires %v args, but %v provided",
78+
cmd, meta.NArgs, len(args))
79+
}
80+
return meta.Func(index, args)
81+
}
82+
}
83+
return "", fmt.Errorf("unknown codesearch command %v", cmd)
84+
}
85+
86+
type Entity struct {
87+
Kind string
88+
Name string
89+
}
90+
91+
func (index *Index) FileIndex(file string) (bool, []Entity, error) {
92+
var entities []Entity
93+
for _, def := range index.db.Definitions {
94+
if def.Body.File == file {
95+
entities = append(entities, Entity{
96+
Kind: def.Kind,
97+
Name: def.Name,
98+
})
99+
}
100+
}
101+
return len(entities) != 0, entities, nil
102+
}
103+
104+
func (index *Index) DefinitionComment(contextFile, name string) (bool, string, string, string, error) {
105+
return index.definitionSource(contextFile, name, true, false)
106+
}
107+
108+
func (index *Index) DefinitionSource(contextFile, name string, includeLines bool) (
109+
bool, string, string, string, error) {
110+
return index.definitionSource(contextFile, name, false, includeLines)
111+
}
112+
113+
func (index *Index) definitionSource(contextFile, name string, comment, includeLines bool) (
114+
bool, string, string, string, error) {
115+
def := index.findDefinition(contextFile, name)
116+
if def == nil {
117+
return false, "", "", "", nil
118+
}
119+
lineRange := def.Body
120+
if comment {
121+
lineRange = def.Comment
122+
}
123+
src, err := index.formatSource(lineRange, includeLines)
124+
return true, def.Body.File, def.Kind, src, err
125+
}
126+
127+
func (index *Index) findDefinition(contextFile, name string) *Definition {
128+
var weakMatch *Definition
129+
for _, def := range index.db.Definitions {
130+
if def.Name == name {
131+
if def.Body.File == contextFile {
132+
return def
133+
}
134+
if !def.IsStatic {
135+
weakMatch = def
136+
}
137+
}
138+
}
139+
return weakMatch
140+
}
141+
142+
func (index *Index) formatSource(lines LineRange, includeLines bool) (string, error) {
143+
if lines.File == "" {
144+
return "", nil
145+
}
146+
for _, dir := range index.srcDirs {
147+
file := filepath.Join(dir, lines.File)
148+
if !osutil.IsExist(file) {
149+
continue
150+
}
151+
return formatSourceFile(file, lines.StartLine, lines.EndLine, includeLines)
152+
}
153+
return "", fmt.Errorf("codesearch: can't find %q file in any of %v", lines.File, index.srcDirs)
154+
}
155+
156+
func formatSourceFile(file string, start, end int, includeLines bool) (string, error) {
157+
data, err := os.ReadFile(file)
158+
if err != nil {
159+
return "", err
160+
}
161+
lines := bytes.Split(data, []byte{'\n'})
162+
start--
163+
end--
164+
if start < 0 || end < start || end > len(lines) {
165+
return "", fmt.Errorf("codesearch: bad line range [%v-%v] for file %v with %v lines",
166+
start, end, file, len(lines))
167+
}
168+
b := new(strings.Builder)
169+
for line := start; line <= end; line++ {
170+
if includeLines {
171+
fmt.Fprintf(b, "%4v:\t%s\n", line, lines[line])
172+
} else {
173+
fmt.Fprintf(b, "%s\n", lines[line])
174+
}
175+
}
176+
return b.String(), nil
177+
}

pkg/codesearch/codesearch_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"bytes"
8+
"os"
9+
"path/filepath"
10+
"strings"
11+
"testing"
12+
13+
"github.com/google/syzkaller/pkg/clangtool/tooltest"
14+
"github.com/google/syzkaller/pkg/osutil"
15+
)
16+
17+
func TestClangTool(t *testing.T) {
18+
tooltest.TestClangTool[Database](t)
19+
}
20+
21+
func TestCommands(t *testing.T) {
22+
db := tooltest.LoadOutput[Database](t)
23+
index := &Index{db, []string{"testdata"}}
24+
files, err := filepath.Glob(filepath.Join(osutil.Abs("testdata"), "query*"))
25+
if err != nil {
26+
t.Fatal(err)
27+
}
28+
if len(files) == 0 {
29+
t.Fatal("found no qeury files")
30+
}
31+
covered := make(map[string]bool)
32+
for _, file := range files {
33+
t.Run(filepath.Base(file), func(t *testing.T) {
34+
testCommand(t, index, covered, file)
35+
})
36+
}
37+
for _, cmd := range Commands {
38+
if !covered[cmd.Name] {
39+
t.Errorf("command %v is not covered, add at least one test", cmd.Name)
40+
}
41+
}
42+
}
43+
44+
func testCommand(t *testing.T, index *Index, covered map[string]bool, file string) {
45+
data, err := os.ReadFile(file)
46+
if err != nil {
47+
t.Fatal(err)
48+
}
49+
query, _, _ := bytes.Cut(data, []byte{'\n'})
50+
args := strings.Fields(string(query))
51+
if len(args) == 0 {
52+
t.Fatal("no command found")
53+
}
54+
result, err := index.Command(args[0], args[1:])
55+
if err != nil {
56+
t.Fatal(err)
57+
}
58+
got := append([]byte(strings.Join(args, " ")+"\n\n"), result...)
59+
tooltest.CompareGoldenData(t, file, got)
60+
covered[args[0]] = true
61+
}

pkg/codesearch/database.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2025 syzkaller project authors. All rights reserved.
2+
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3+
4+
package codesearch
5+
6+
import (
7+
"strings"
8+
9+
"github.com/google/syzkaller/pkg/clangtool"
10+
)
11+
12+
type Database struct {
13+
Definitions []*Definition `json:"definitions,omitempty"`
14+
}
15+
16+
type Definition struct {
17+
Kind string `json:"kind,omitempty"`
18+
Name string `json:"name,omitempty"`
19+
Type string `json:"type,omitempty"`
20+
IsStatic bool `json:"is_static,omitempty"`
21+
Body LineRange `json:"body,omitempty"`
22+
Comment LineRange `json:"comment,omitempty"`
23+
}
24+
25+
type LineRange struct {
26+
File string `json:"file,omitempty"`
27+
StartLine int `json:"start_line,omitempty"`
28+
EndLine int `json:"end_line,omitempty"`
29+
}
30+
31+
func (db *Database) Merge(other *Database) {
32+
db.Definitions = append(db.Definitions, other.Definitions...)
33+
}
34+
35+
func (db *Database) Finalize(v *clangtool.Verifier) {
36+
db.Definitions = clangtool.SortAndDedupSlice(db.Definitions)
37+
38+
for _, def := range db.Definitions {
39+
v.LineRange(def.Body.File, def.Body.StartLine, def.Body.EndLine)
40+
if def.Comment.File != "" {
41+
v.LineRange(def.Comment.File, def.Comment.StartLine, def.Comment.EndLine)
42+
}
43+
}
44+
}
45+
46+
// SetSoureFile attaches the source file to the entities that need it.
47+
// The clang tool could do it, but it looks easier to do it here.
48+
func (db *Database) SetSourceFile(file string, updatePath func(string) string) {
49+
for _, def := range db.Definitions {
50+
def.Body.File = updatePath(def.Body.File)
51+
def.Comment.File = updatePath(def.Comment.File)
52+
if strings.HasSuffix(def.Body.File, ".c") && def.Body.File != file {
53+
def.IsStatic = false
54+
}
55+
}
56+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def-comment source0.c close
2+
3+
function close is defined in source0.c and is not commented
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def-comment source0.c function_with_comment_in_header
2+
3+
function function_with_comment_in_header is defined in source0.c and is not commented
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
def-comment source0.c open
2+
3+
function open is defined in source0.c and commented as:
4+
5+
/*
6+
* Comment about open.
7+
*/
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def-source source0.c close no
2+
3+
function close is defined in source0.c:
4+
5+
int close()
6+
{
7+
return 0;
8+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def-source source0.c function_with_comment_in_header yes
2+
3+
function function_with_comment_in_header is defined in source0.c:
4+
5+
18: void function_with_comment_in_header()
6+
19: {
7+
20: same_name_in_several_files();
8+
21: }

0 commit comments

Comments
 (0)