diff --git a/pkg/clangtool/clangtool.go b/pkg/clangtool/clangtool.go index 8711b54110e9..dd7c22f0a8c3 100644 --- a/pkg/clangtool/clangtool.go +++ b/pkg/clangtool/clangtool.go @@ -32,7 +32,7 @@ type Config struct { type OutputDataPtr[T any] interface { *T - Merge(*T) + Merge(*T, *Verifier) SetSourceFile(string, func(filename string) string) Finalize(*Verifier) } @@ -73,21 +73,22 @@ func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, e } close(files) + v := NewVerifier(cfg.KernelSrc, cfg.KernelObj) out := OutputPtr(new(Output)) for range cmds { res := <-results if res.err != nil { return nil, res.err } - out.Merge(res.out) + out.Merge(res.out, v) } // Finalize the output (sort, dedup, etc), and let the output verify // that all source file names, line numbers, etc are valid/present. // If there are any bogus entries, it's better to detect them early, // than to crash/error much later when the info is used. // Some of the source files (generated) may be in the obj dir. - srcDirs := []string{cfg.KernelSrc, cfg.KernelObj} - if err := Finalize(out, srcDirs); err != nil { + out.Finalize(v) + if err := v.Error(); err != nil { return nil, err } if cfg.CacheFile != "" { @@ -103,24 +104,26 @@ func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, e return out, nil } -func Finalize[Output any, OutputPtr OutputDataPtr[Output]](out OutputPtr, srcDirs []string) error { - v := &Verifier{ - srcDirs: srcDirs, +type Verifier struct { + srcDirs []string + fileCache map[string]int // file->line count (-1 is cached for missing files) + err strings.Builder +} + +func NewVerifier(src ...string) *Verifier { + return &Verifier{ + srcDirs: src, fileCache: make(map[string]int), } - out.Finalize(v) +} + +func (v *Verifier) Error() error { if v.err.Len() == 0 { return nil } return errors.New(v.err.String()) } -type Verifier struct { - srcDirs []string - fileCache map[string]int // file->line count (-1 is cached for missing files) - err strings.Builder -} - func (v *Verifier) Filename(file string) { if _, ok := v.fileCache[file]; ok { return diff --git a/pkg/clangtool/tooltest/tooltest.go b/pkg/clangtool/tooltest/tooltest.go index 11aae2e88c7c..4f0b3bcedc9b 100644 --- a/pkg/clangtool/tooltest/tooltest.go +++ b/pkg/clangtool/tooltest/tooltest.go @@ -42,14 +42,16 @@ func TestClangTool[Output any, OutputPtr clangtool.OutputDataPtr[Output]](t *tes func LoadOutput[Output any, OutputPtr clangtool.OutputDataPtr[Output]](t *testing.T) OutputPtr { out := OutputPtr(new(Output)) + v := clangtool.NewVerifier("testdata") forEachTestFile(t, func(t *testing.T, file string) { tmp, err := osutil.ReadJSON[OutputPtr](file + ".json") if err != nil { t.Fatal(err) } - out.Merge(tmp) + out.Merge(tmp, v) }) - if err := clangtool.Finalize(out, []string{"testdata"}); err != nil { + out.Finalize(v) + if err := v.Error(); err != nil { t.Fatal(err) } return out diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go index 051cad3c5b77..33a87e712068 100644 --- a/pkg/codesearch/codesearch.go +++ b/pkg/codesearch/codesearch.go @@ -206,7 +206,7 @@ func (index *Index) FileIndex(file string) ([]Entity, error) { for _, def := range index.db.Definitions { if def.Body.File == file { entities = append(entities, Entity{ - Kind: def.Kind, + Kind: def.Kind.String(), Name: def.Name, }) } @@ -243,7 +243,7 @@ func (index *Index) definitionSource(contextFile, name string, comment, includeL } return &EntityInfo{ File: def.Body.File, - Kind: def.Kind, + Kind: def.Kind.String(), Body: src, }, nil } @@ -266,6 +266,7 @@ func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextL if srcPrefix != "" { srcPrefix = filepath.Clean(srcPrefix) } + contextLines = min(contextLines, 10000) totalCount := 0 var results []ReferenceInfo for _, def := range index.db.Definitions { @@ -289,8 +290,8 @@ func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextL if contextLines > 0 { lines := LineRange{ File: def.Body.File, - StartLine: max(def.Body.StartLine, ref.Line-contextLines), - EndLine: min(def.Body.EndLine, ref.Line+contextLines), + StartLine: max(def.Body.StartLine, uint32(max(0, int(ref.Line)-contextLines))), + EndLine: min(def.Body.EndLine, ref.Line+uint32(contextLines)), } var err error snippet, err = index.formatSource(lines, true) @@ -299,11 +300,11 @@ func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextL } } results = append(results, ReferenceInfo{ - ReferencingEntityKind: def.Kind, + ReferencingEntityKind: def.Kind.String(), ReferencingEntityName: def.Name, - ReferenceKind: ref.Kind, + ReferenceKind: ref.Kind.String(), SourceFile: def.Body.File, - SourceLine: ref.Line, + SourceLine: int(ref.Line), SourceSnippet: snippet, }) } @@ -342,7 +343,7 @@ func (index *Index) formatSource(lines LineRange, includeLines bool) (string, er if !osutil.IsExist(file) { continue } - return formatSourceFile(file, lines.StartLine, lines.EndLine, includeLines) + return formatSourceFile(file, int(lines.StartLine), int(lines.EndLine), includeLines) } return "", fmt.Errorf("codesearch: can't find %q file in any of %v", lines.File, index.srcDirs) } diff --git a/pkg/codesearch/database.go b/pkg/codesearch/database.go index edeb761cc44d..4abd9b6909ca 100644 --- a/pkg/codesearch/database.go +++ b/pkg/codesearch/database.go @@ -4,6 +4,10 @@ package codesearch import ( + "bytes" + "fmt" + "maps" + "slices" "strings" "github.com/google/jsonschema-go/jsonschema" @@ -13,12 +17,16 @@ import ( type Database struct { Definitions []*Definition `json:"definitions,omitempty"` + + mergeCache map[string]*Definition + reverseCache map[*Definition]string + stringCache map[string]string } type Definition struct { - Kind string `json:"kind,omitempty"` Name string `json:"name,omitempty"` Type string `json:"type,omitempty"` + Kind EntityKind `json:"kind,omitempty"` IsStatic bool `json:"is_static,omitempty"` Body LineRange `json:"body,omitempty"` Comment LineRange `json:"comment,omitempty"` @@ -26,16 +34,110 @@ type Definition struct { } type Reference struct { - Kind string `json:"kind,omitempty"` - EntityKind string `json:"entity_kind,omitempty"` - Name string `json:"name,omitempty"` - Line int `json:"line,omitempty"` + Name string `json:"name,omitempty"` + Kind RefKind `json:"kind,omitempty"` + EntityKind EntityKind `json:"entity_kind,omitempty"` + Line uint32 `json:"line,omitempty"` } type LineRange struct { File string `json:"file,omitempty"` - StartLine int `json:"start_line,omitempty"` - EndLine int `json:"end_line,omitempty"` + StartLine uint32 `json:"start_line,omitempty"` + EndLine uint32 `json:"end_line,omitempty"` +} + +type EntityKind uint8 + +const ( + entityKindInvalid EntityKind = iota + EntityKindFunction + EntityKindStruct + EntityKindUnion + EntityKindVariable + EntityKindMacro + EntityKindEnum + EntityKindTypedef + entityKindLast +) + +var entityKindNames = [...]string{ + EntityKindFunction: "function", + EntityKindStruct: "struct", + EntityKindUnion: "union", + EntityKindVariable: "variable", + EntityKindMacro: "macro", + EntityKindEnum: "enum", + EntityKindTypedef: "typedef", +} + +var entityKindBytes = func() [entityKindLast][]byte { + var ret [entityKindLast][]byte + for k, v := range entityKindNames { + ret[k] = []byte("\"" + v + "\"") + } + return ret +}() + +func (v *EntityKind) String() string { + return entityKindNames[*v] +} + +func (v *EntityKind) MarshalJSON() ([]byte, error) { + return entityKindBytes[*v], nil +} + +func (v *EntityKind) UnmarshalJSON(data []byte) error { + *v = entityKindInvalid + for k, val := range entityKindBytes { + if bytes.Equal(data, val) { + *v = EntityKind(k) + break + } + } + return nil +} + +type RefKind uint8 + +const ( + refKindInvalid RefKind = iota + RefKindUses + RefKindCall + RefKindTakesAddr + refKindLast +) + +var refKindNames = [...]string{ + RefKindUses: "uses", + RefKindCall: "calls", + RefKindTakesAddr: "takes-address-of", +} + +var refKindBytes = func() [refKindLast][]byte { + var ret [refKindLast][]byte + for k, v := range refKindNames { + ret[k] = []byte("\"" + v + "\"") + } + return ret +}() + +func (v *RefKind) String() string { + return refKindNames[*v] +} + +func (v *RefKind) MarshalJSON() ([]byte, error) { + return refKindBytes[*v], nil +} + +func (v *RefKind) UnmarshalJSON(data []byte) error { + *v = refKindInvalid + for k, val := range refKindBytes { + if bytes.Equal(data, val) { + *v = RefKind(k) + break + } + } + return nil } // DatabaseFormatHash contains a hash uniquely identifying format of the database. @@ -44,7 +146,7 @@ type LineRange struct { var DatabaseFormatHash = func() string { // Semantic version should be bumped when the schema does not change, // but stored values changes. - const semanticVersion = "2" + const semanticVersion = "3" schema, err := jsonschema.For[Database](nil) if err != nil { panic(err) @@ -52,21 +154,42 @@ var DatabaseFormatHash = func() string { return hash.String(schema, semanticVersion) }() -func (db *Database) Merge(other *Database) { - db.Definitions = append(db.Definitions, other.Definitions...) -} - -func (db *Database) Finalize(v *clangtool.Verifier) { - db.Definitions = clangtool.SortAndDedupSlice(db.Definitions) - - for _, def := range db.Definitions { - v.LineRange(def.Body.File, def.Body.StartLine, def.Body.EndLine) +func (db *Database) Merge(other *Database, v *clangtool.Verifier) { + if db.mergeCache == nil { + db.mergeCache = make(map[string]*Definition) + db.reverseCache = make(map[*Definition]string) + db.stringCache = make(map[string]string) + } + for _, def := range other.Definitions { + id := fmt.Sprintf("%v-%v-%v", def.Kind, def.Name, def.Body.File) + if _, ok := db.mergeCache[id]; ok { + continue + } + db.mergeCache[id] = def + db.reverseCache[def] = id + v.LineRange(def.Body.File, int(def.Body.StartLine), int(def.Body.EndLine)) if def.Comment.File != "" { - v.LineRange(def.Comment.File, def.Comment.StartLine, def.Comment.EndLine) + v.LineRange(def.Comment.File, int(def.Comment.StartLine), int(def.Comment.EndLine)) + } + db.intern(&def.Name) + db.intern(&def.Type) + db.intern(&def.Body.File) + db.intern(&def.Comment.File) + for _, ref := range def.Refs { + db.intern(&ref.Name) } } } +func (db *Database) Finalize(v *clangtool.Verifier) { + db.Definitions = slices.Collect(maps.Values(db.mergeCache)) + slices.SortFunc(db.Definitions, func(a, b *Definition) int { + return strings.Compare(db.reverseCache[a], db.reverseCache[b]) + }) + db.mergeCache = nil + db.reverseCache = nil +} + // SetSoureFile attaches the source file to the entities that need it. // The clang tool could do it, but it looks easier to do it here. func (db *Database) SetSourceFile(file string, updatePath func(string) string) { @@ -78,3 +201,15 @@ func (db *Database) SetSourceFile(file string, updatePath func(string) string) { } } } + +func (db *Database) intern(str *string) { + if *str == "" { + return + } + v, ok := db.stringCache[*str] + if !ok { + v = strings.Clone(*str) + db.stringCache[v] = v + } + *str = v +} diff --git a/pkg/codesearch/testdata/mm/refs.c.json b/pkg/codesearch/testdata/mm/refs.c.json index 09ac87f98208..50aa676bec9b 100644 --- a/pkg/codesearch/testdata/mm/refs.c.json +++ b/pkg/codesearch/testdata/mm/refs.c.json @@ -1,9 +1,9 @@ { "definitions": [ { - "kind": "function", "name": "ref_in_mm", "type": "void ()", + "kind": "function", "body": { "file": "mm/refs.c", "start_line": 3, @@ -12,9 +12,9 @@ "comment": {}, "refs": [ { + "name": "refs2", "kind": "calls", "entity_kind": "function", - "name": "refs2", "line": 5 } ] diff --git a/pkg/codesearch/testdata/query-file-index-source b/pkg/codesearch/testdata/query-file-index-source index 3263deca31dc..af52513b7183 100644 --- a/pkg/codesearch/testdata/query-file-index-source +++ b/pkg/codesearch/testdata/query-file-index-source @@ -3,6 +3,7 @@ file-index source0.c file source0.c defines the following entities: function close +function func_accepting_a_struct function function_with_comment_in_header function open struct struct_in_c_file diff --git a/pkg/codesearch/testdata/query-find-references-struct b/pkg/codesearch/testdata/query-find-references-struct new file mode 100644 index 000000000000..cfb266dc7bce --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-struct @@ -0,0 +1,15 @@ +find-references source0.c some_struct "" 1 10 + +some_struct has 2 references: + +function func_accepting_a_struct uses it at source0.c:29 + 29: int func_accepting_a_struct(struct some_struct* p) + 30: { + + +function func_accepting_a_struct uses it at source0.c:31 + 30: { + 31: return ((some_struct_t*)p)->x + + 32: ((union some_union*)p)->x; + + diff --git a/pkg/codesearch/testdata/refs.c.json b/pkg/codesearch/testdata/refs.c.json index 289ce7c305d4..d8a1034e2139 100644 --- a/pkg/codesearch/testdata/refs.c.json +++ b/pkg/codesearch/testdata/refs.c.json @@ -1,9 +1,9 @@ { "definitions": [ { - "kind": "function", "name": "long_func_with_ref", "type": "void ()", + "kind": "function", "body": { "file": "refs.c", "start_line": 23, @@ -12,77 +12,77 @@ "comment": {}, "refs": [ { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 25 }, { + "name": "refs1", "kind": "calls", "entity_kind": "function", - "name": "refs1", "line": 26 }, { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 27 }, { + "name": "refs1", "kind": "calls", "entity_kind": "function", - "name": "refs1", "line": 28 }, { + "name": "refs2", "kind": "calls", "entity_kind": "function", - "name": "refs2", "line": 29 }, { + "name": "refs1", "kind": "takes-address-of", "entity_kind": "function", - "name": "refs1", "line": 29 }, { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 29 }, { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 30 }, { + "name": "refs1", "kind": "calls", "entity_kind": "function", - "name": "refs1", "line": 31 }, { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 32 }, { + "name": "refs1", "kind": "calls", "entity_kind": "function", - "name": "refs1", "line": 33 } ] }, { - "kind": "function", "name": "refs0", "type": "int ()", + "kind": "function", "body": { "file": "refs.c", "start_line": 4, @@ -95,9 +95,9 @@ } }, { - "kind": "function", "name": "refs1", "type": "void ()", + "kind": "function", "body": { "file": "refs.c", "start_line": 9, @@ -106,9 +106,9 @@ "comment": {} }, { - "kind": "function", "name": "refs2", "type": "void (void (*)(), int)", + "kind": "function", "body": { "file": "refs.c", "start_line": 13, @@ -117,9 +117,9 @@ "comment": {} }, { - "kind": "function", "name": "refs3", "type": "void ()", + "kind": "function", "body": { "file": "refs.c", "start_line": 17, @@ -128,27 +128,27 @@ "comment": {}, "refs": [ { + "name": "refs2", "kind": "calls", "entity_kind": "function", - "name": "refs2", "line": 19 }, { + "name": "refs1", "kind": "takes-address-of", "entity_kind": "function", - "name": "refs1", "line": 19 }, { + "name": "refs0", "kind": "calls", "entity_kind": "function", - "name": "refs0", "line": 19 }, { + "name": "refs2", "kind": "takes-address-of", "entity_kind": "function", - "name": "refs2", "line": 20 } ] diff --git a/pkg/codesearch/testdata/source0.c b/pkg/codesearch/testdata/source0.c index 2d312ff995e7..5ac117cab1b3 100644 --- a/pkg/codesearch/testdata/source0.c +++ b/pkg/codesearch/testdata/source0.c @@ -25,3 +25,9 @@ void function_with_comment_in_header() { same_name_in_several_files(); } + +int func_accepting_a_struct(struct some_struct* p) +{ + return ((some_struct_t*)p)->x + + ((union some_union*)p)->x; +} diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json index ba6b4175133f..c2c2de3e498d 100644 --- a/pkg/codesearch/testdata/source0.c.json +++ b/pkg/codesearch/testdata/source0.c.json @@ -1,8 +1,8 @@ { "definitions": [ { - "kind": "enum", "name": "some_enum", + "kind": "enum", "body": { "file": "source0.h", "start_line": 45, @@ -11,9 +11,9 @@ "comment": {} }, { - "kind": "function", "name": "close", "type": "int ()", + "kind": "function", "body": { "file": "source0.c", "start_line": 19, @@ -22,9 +22,46 @@ "comment": {} }, { + "name": "func_accepting_a_struct", + "type": "int (struct some_struct *)", "kind": "function", + "body": { + "file": "source0.c", + "start_line": 29, + "end_line": 33 + }, + "comment": {}, + "refs": [ + { + "name": "some_struct", + "kind": "uses", + "entity_kind": "struct", + "line": 29 + }, + { + "name": "some_struct_t", + "kind": "uses", + "entity_kind": "typedef", + "line": 31 + }, + { + "name": "some_struct", + "kind": "uses", + "entity_kind": "struct", + "line": 31 + }, + { + "name": "some_union", + "kind": "uses", + "entity_kind": "union", + "line": 32 + } + ] + }, + { "name": "func_in_header", "type": "int ()", + "kind": "function", "is_static": true, "body": { "file": "source0.h", @@ -34,9 +71,9 @@ "comment": {} }, { - "kind": "function", "name": "function_with_comment_in_header", "type": "void ()", + "kind": "function", "body": { "file": "source0.c", "start_line": 24, @@ -45,17 +82,17 @@ "comment": {}, "refs": [ { + "name": "same_name_in_several_files", "kind": "calls", "entity_kind": "function", - "name": "same_name_in_several_files", "line": 26 } ] }, { - "kind": "function", "name": "open", "type": "int ()", + "kind": "function", "body": { "file": "source0.c", "start_line": 11, @@ -68,8 +105,8 @@ } }, { - "kind": "struct", "name": "another_struct", + "kind": "struct", "body": { "file": "source0.h", "start_line": 36, @@ -78,8 +115,8 @@ "comment": {} }, { - "kind": "struct", "name": "some_struct", + "kind": "struct", "body": { "file": "source0.h", "start_line": 17, @@ -88,8 +125,8 @@ "comment": {} }, { - "kind": "struct", "name": "some_struct_with_a_comment", + "kind": "struct", "body": { "file": "source0.h", "start_line": 24, @@ -102,8 +139,8 @@ } }, { - "kind": "struct", "name": "struct_in_c_file", + "kind": "struct", "body": { "file": "source0.c", "start_line": 6, @@ -112,8 +149,8 @@ "comment": {} }, { - "kind": "typedef", "name": "another_struct_t", + "kind": "typedef", "body": { "file": "source0.h", "start_line": 36, @@ -122,8 +159,8 @@ "comment": {} }, { - "kind": "typedef", "name": "some_enum_t", + "kind": "typedef", "body": { "file": "source0.h", "start_line": 50, @@ -132,8 +169,8 @@ "comment": {} }, { - "kind": "typedef", "name": "some_struct_t", + "kind": "typedef", "body": { "file": "source0.h", "start_line": 22, @@ -142,8 +179,8 @@ "comment": {} }, { - "kind": "typedef", "name": "typedefed_struct_t", + "kind": "typedef", "body": { "file": "source0.h", "start_line": 32, @@ -152,8 +189,8 @@ "comment": {} }, { - "kind": "union", "name": "some_union", + "kind": "union", "body": { "file": "source0.h", "start_line": 40, diff --git a/pkg/codesearch/testdata/source1.c.json b/pkg/codesearch/testdata/source1.c.json index 72278a191eee..9a90d789f07b 100644 --- a/pkg/codesearch/testdata/source1.c.json +++ b/pkg/codesearch/testdata/source1.c.json @@ -1,9 +1,9 @@ { "definitions": [ { - "kind": "function", "name": "same_name_in_several_files", "type": "void ()", + "kind": "function", "is_static": true, "body": { "file": "source1.c", diff --git a/pkg/codesearch/testdata/source2.c.json b/pkg/codesearch/testdata/source2.c.json index 4407152db595..5e75950c0b04 100644 --- a/pkg/codesearch/testdata/source2.c.json +++ b/pkg/codesearch/testdata/source2.c.json @@ -1,9 +1,9 @@ { "definitions": [ { - "kind": "function", "name": "same_name_in_several_files", "type": "void ()", + "kind": "function", "body": { "file": "source2.c", "start_line": 4, diff --git a/pkg/declextract/entity.go b/pkg/declextract/entity.go index 3b5e13a6d908..82bf00446851 100644 --- a/pkg/declextract/entity.go +++ b/pkg/declextract/entity.go @@ -228,7 +228,7 @@ type EntityGlobalAddr struct { Name string } -func (out *Output) Merge(other *Output) { +func (out *Output) Merge(other *Output, v *clangtool.Verifier) { out.Functions = append(out.Functions, other.Functions...) out.Consts = append(out.Consts, other.Consts...) out.Enums = append(out.Enums, other.Enums...) diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp index 8b096b1aba5b..a1174b26b2e4 100644 --- a/tools/clang/codesearch/codesearch.cpp +++ b/tools/clang/codesearch/codesearch.cpp @@ -76,7 +76,12 @@ class Indexer : public RecursiveASTVisitor { bool TraverseEnumDecl(EnumDecl*); bool TraverseTypedefDecl(TypedefDecl*); bool TraverseCallExpr(CallExpr*); + bool TraverseCStyleCastExpr(CStyleCastExpr*); + bool TraverseVarDecl(VarDecl*); + bool TraverseParmVarDecl(ParmVarDecl*); bool VisitDeclRefExpr(const DeclRefExpr*); + bool VisitTagType(const TagType*); + bool VisitTypedefType(const TypedefType*); private: ASTContext& Context; @@ -84,6 +89,10 @@ class Indexer : public RecursiveASTVisitor { Output& Out; Definition* Current = nullptr; bool InCallee = false; + // If set, record references to struct types as uses. + SourceLocation TypeRefingLocation; + + void EmitReference(SourceLocation Loc, const NamedDecl* Named, const char* EntityKind, const char* RefKind); struct NamedDeclEmitter { NamedDeclEmitter(Indexer* Parent, const NamedDecl* Decl, const char* Kind, const std::string& Type, bool IsStatic); @@ -100,6 +109,13 @@ class Indexer : public RecursiveASTVisitor { using Base = RecursiveASTVisitor; }; +template struct ScopedState { + T* const Var; + T Saved; + ScopedState(T* Var, T ScopeValue) : Var(Var), Saved(*Var) { *Var = ScopeValue; } + ~ScopedState() { *Var = Saved; } +}; + bool Instance::handleBeginSource(CompilerInstance& CI) { Preprocessor& PP = CI.getPreprocessor(); PP.addPPCallbacks(std::make_unique(PP, Macros)); @@ -173,27 +189,76 @@ bool Indexer::TraverseFunctionDecl(FunctionDecl* Func) { } bool Indexer::TraverseCallExpr(CallExpr* CE) { - bool SavedInCallee = InCallee; - InCallee = true; - TraverseStmt(CE->getCallee()); - InCallee = SavedInCallee; - + { + ScopedState Scoped(&InCallee, true); + TraverseStmt(CE->getCallee()); + } for (auto* Arg : CE->arguments()) TraverseStmt(Arg); return true; } bool Indexer::VisitDeclRefExpr(const DeclRefExpr* DeclRef) { - const auto* Func = dyn_cast(DeclRef->getDecl()); - if (!Func || !Current) + if (const auto* Func = dyn_cast(DeclRef->getDecl())) + EmitReference(DeclRef->getBeginLoc(), DeclRef->getDecl(), EntityKindFunction, + InCallee ? RefKindCall : RefKindTakesAddr); + return true; +} + +bool Indexer::TraverseVarDecl(VarDecl* Decl) { + ScopedState Scoped(&TypeRefingLocation, Decl->getBeginLoc()); + return Base::TraverseVarDecl(Decl); +} + +bool Indexer::TraverseParmVarDecl(ParmVarDecl* Decl) { + ScopedState Scoped(&TypeRefingLocation, Decl->getBeginLoc()); + return Base::TraverseParmVarDecl(Decl); +} + +bool Indexer::TraverseCStyleCastExpr(CStyleCastExpr* Cast) { + ScopedState Scoped(&TypeRefingLocation, Cast->getBeginLoc()); + return Base::TraverseCStyleCastExpr(Cast); +} + +bool Indexer::VisitTagType(const TagType* T) { + if (TypeRefingLocation.isInvalid()) + return true; + const auto* Tag = T->getAsTagDecl(); + const char* EntityKind = nullptr; + if (Tag->isStruct()) + EntityKind = EntityKindStruct; + else if (Tag->isUnion()) + EntityKind = EntityKindUnion; + else if (Tag->isEnum()) + EntityKind = EntityKindEnum; + else return true; + EmitReference(TypeRefingLocation, Tag, EntityKind, RefKindUses); + return true; +} + +bool Indexer::VisitTypedefType(const TypedefType* T) { + if (TypeRefingLocation.isInvalid()) + return true; + EmitReference(TypeRefingLocation, T->getDecl(), EntityKindTypedef, RefKindUses); + // If it's a struct typedef, also note the struct use. + if (const auto* Tag = dyn_cast(T->getCanonicalTypeInternal().getTypePtr())) + VisitTagType(Tag); + return true; +} + +void Indexer::EmitReference(SourceLocation Loc, const NamedDecl* Named, const char* EntityKind, const char* RefKind) { + if (!Current || !Named || Named->getNameAsString().empty()) + return; + const std::string& Name = Named->getNameAsString(); + if (Name.empty()) + return; Current->Refs.push_back(Reference{ - .Kind = InCallee ? RefKindCall : RefKindTakesAddr, - .EntityKind = EntityKindFunction, - .Name = Func->getNameAsString(), - .Line = static_cast(SM.getExpansionLineNumber(DeclRef->getBeginLoc())), + .Kind = RefKind, + .EntityKind = EntityKind, + .Name = Name, + .Line = static_cast(SM.getExpansionLineNumber(Loc)), }); - return true; } bool Indexer::TraverseRecordDecl(RecordDecl* Decl) { diff --git a/tools/clang/codesearch/output.h b/tools/clang/codesearch/output.h index cb902444396a..8768d30248a9 100644 --- a/tools/clang/codesearch/output.h +++ b/tools/clang/codesearch/output.h @@ -18,6 +18,9 @@ constexpr char EntityKindMacro[] = "macro"; constexpr char EntityKindEnum[] = "enum"; constexpr char EntityKindTypedef[] = "typedef"; +// The uses reference is very generic, ideally we refine it in the future +// (e.g. "used as an argument type", "cast to this type", "includes field of this type", etc). +constexpr char RefKindUses[] = "uses"; constexpr char RefKindCall[] = "calls"; constexpr char RefKindTakesAddr[] = "takes-address-of"; diff --git a/tools/syz-codesearch/codesearch.go b/tools/syz-codesearch/codesearch.go index afd3840c7db0..be1efbba56f9 100644 --- a/tools/syz-codesearch/codesearch.go +++ b/tools/syz-codesearch/codesearch.go @@ -19,7 +19,7 @@ func main() { flagKernelSrc = flag.String("kernel-src", "", "path to kernel source directory (mandatory)") flagKernelObj = flag.String("kernel-obj", "", "path to kernel build directory (mandatory)") ) - flag.Parse() + defer tool.Init()() if len(flag.Args()) == 0 || *flagDatabase == "" || *flagKernelSrc == "" || *flagKernelObj == "" { printUsageAndExit() }