From 5b97dbae4acb1b83571c802e3d40c89e73d71e06 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 26 Jan 2026 17:56:07 +0100 Subject: [PATCH] pkg/codesearch: support finding field reads/writes --- pkg/aflow/tool/codesearcher/codesearcher.go | 5 +- pkg/codesearch/codesearch.go | 12 ++- pkg/codesearch/codesearch_test.go | 4 +- pkg/codesearch/database.go | 6 ++ .../testdata/query-def-source-struct7 | 3 +- .../testdata/query-def-source-struct8 | 8 +- .../testdata/query-def-source-struct9 | 2 +- .../testdata/query-file-index-source | 1 + .../testdata/query-find-references-field0 | 34 +++++++ .../testdata/query-find-references-field1 | 10 +++ .../testdata/query-find-references-field2 | 10 +++ .../testdata/query-find-references-struct | 7 +- pkg/codesearch/testdata/source0.c | 9 ++ pkg/codesearch/testdata/source0.c.json | 89 +++++++++++++++++-- pkg/codesearch/testdata/source0.h | 1 + tools/clang/codesearch/codesearch.cpp | 47 +++++++++- tools/clang/codesearch/output.h | 3 + 17 files changed, 229 insertions(+), 22 deletions(-) create mode 100644 pkg/codesearch/testdata/query-find-references-field0 create mode 100644 pkg/codesearch/testdata/query-find-references-field1 create mode 100644 pkg/codesearch/testdata/query-find-references-field2 diff --git a/pkg/aflow/tool/codesearcher/codesearcher.go b/pkg/aflow/tool/codesearcher/codesearcher.go index 263c1f8f9f37..49c920ba3c02 100644 --- a/pkg/aflow/tool/codesearcher/codesearcher.go +++ b/pkg/aflow/tool/codesearcher/codesearcher.go @@ -48,7 +48,10 @@ For example, how a function works, what precondition error checks it has, etc. aflow.NewFuncTool("codesearch-find-references", findReferences, ` Tool finds and lists all references to (uses of) the given entity. Entity can be function, struct, or global variable. -If can be used to find all calls or other uses of the given function. +If can be used to find all calls or other uses of the given function, +definition of the given struct/union/enum, +or all reads/writes of the given struct/union field. +To find field references use 'struct_name::field_name' syntax. `), } diff --git a/pkg/codesearch/codesearch.go b/pkg/codesearch/codesearch.go index 33a87e712068..53618fb6fc56 100644 --- a/pkg/codesearch/codesearch.go +++ b/pkg/codesearch/codesearch.go @@ -259,8 +259,14 @@ type ReferenceInfo struct { func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextLines, outputLimit int) ( []ReferenceInfo, int, error) { + // Just in case LLM decides to reference structs/fields with the tag. + name = strings.TrimSpace(strings.TrimPrefix(strings.TrimPrefix(strings.TrimSpace(name), + "struct "), "union ")) + // We don't export each field as a separate definition, + // so we do just name-based match for them. + isField := strings.Contains(name, "::") target := index.findDefinition(contextFile, name) - if target == nil { + if target == nil && !isField { return nil, 0, aflow.BadCallError("requested entity does not exist") } if srcPrefix != "" { @@ -278,8 +284,8 @@ func (index *Index) FindReferences(contextFile, name, srcPrefix string, contextL // the target is a non-static 'foo' in some file, // the reference is in another file and refers to a static 'foo' // defined in that file (which is not the target 'foo'). - if ref.EntityKind != target.Kind || ref.Name != target.Name || - target.IsStatic && target.Body.File != def.Body.File { + if ref.Name != name || !isField && (ref.EntityKind != target.Kind || + target.IsStatic && target.Body.File != def.Body.File) { continue } totalCount++ diff --git a/pkg/codesearch/codesearch_test.go b/pkg/codesearch/codesearch_test.go index 85dc90e0012d..35671567e1cd 100644 --- a/pkg/codesearch/codesearch_test.go +++ b/pkg/codesearch/codesearch_test.go @@ -54,8 +54,8 @@ func testCommand(t *testing.T, index *Index, covered map[string]bool, file strin cmd := fields[0] var args []string for _, arg := range fields[1:] { - if arg == `""` { - arg = "" + if len(arg) >= 2 && arg[0] == '"' && arg[len(arg)-1] == '"' { + arg = arg[1 : len(arg)-1] } args = append(args, arg) } diff --git a/pkg/codesearch/database.go b/pkg/codesearch/database.go index 4abd9b6909ca..92f0944a92e0 100644 --- a/pkg/codesearch/database.go +++ b/pkg/codesearch/database.go @@ -57,6 +57,7 @@ const ( EntityKindMacro EntityKindEnum EntityKindTypedef + EntityKindField entityKindLast ) @@ -68,6 +69,7 @@ var entityKindNames = [...]string{ EntityKindMacro: "macro", EntityKindEnum: "enum", EntityKindTypedef: "typedef", + EntityKindField: "field", } var entityKindBytes = func() [entityKindLast][]byte { @@ -103,6 +105,8 @@ const ( refKindInvalid RefKind = iota RefKindUses RefKindCall + RefKindRead + RefKindWrite RefKindTakesAddr refKindLast ) @@ -110,6 +114,8 @@ const ( var refKindNames = [...]string{ RefKindUses: "uses", RefKindCall: "calls", + RefKindRead: "reads", + RefKindWrite: "writes", RefKindTakesAddr: "takes-address-of", } diff --git a/pkg/codesearch/testdata/query-def-source-struct7 b/pkg/codesearch/testdata/query-def-source-struct7 index 3d62126140a7..12f1955b3908 100644 --- a/pkg/codesearch/testdata/query-def-source-struct7 +++ b/pkg/codesearch/testdata/query-def-source-struct7 @@ -5,4 +5,5 @@ union some_union is defined in source0.h: 40: union some_union { 41: int x; 42: void* p; - 43: }; + 43: struct some_struct s; + 44: }; diff --git a/pkg/codesearch/testdata/query-def-source-struct8 b/pkg/codesearch/testdata/query-def-source-struct8 index 93c7c65164df..40dcfbd656af 100644 --- a/pkg/codesearch/testdata/query-def-source-struct8 +++ b/pkg/codesearch/testdata/query-def-source-struct8 @@ -2,7 +2,7 @@ def-source source0.c some_enum yes enum some_enum is defined in source0.h: - 45: enum some_enum { - 46: enum_foo = 1, - 47: enum_bar = 2, - 48: }; + 46: enum some_enum { + 47: enum_foo = 1, + 48: enum_bar = 2, + 49: }; diff --git a/pkg/codesearch/testdata/query-def-source-struct9 b/pkg/codesearch/testdata/query-def-source-struct9 index 20056963a90b..a652d0f428a9 100644 --- a/pkg/codesearch/testdata/query-def-source-struct9 +++ b/pkg/codesearch/testdata/query-def-source-struct9 @@ -2,4 +2,4 @@ def-source source0.c some_enum_t yes typedef some_enum_t is defined in source0.h: - 50: typedef enum some_enum some_enum_t; + 51: typedef enum some_enum some_enum_t; diff --git a/pkg/codesearch/testdata/query-file-index-source b/pkg/codesearch/testdata/query-file-index-source index edfe4616f20a..bf7c20b220c7 100644 --- a/pkg/codesearch/testdata/query-file-index-source +++ b/pkg/codesearch/testdata/query-file-index-source @@ -3,6 +3,7 @@ file-index source0.c file source0.c defines the following entities: function close +function field_refs function func_accepting_a_struct function function_with_comment_in_header function function_with_quotes_in_type diff --git a/pkg/codesearch/testdata/query-find-references-field0 b/pkg/codesearch/testdata/query-find-references-field0 new file mode 100644 index 000000000000..e7a82ca47634 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-field0 @@ -0,0 +1,34 @@ +find-references source0.c some_struct::x "" 1 10 + +some_struct::x has 5 references: + +function field_refs writes it at source0.c:41 + 40: { + 41: p->x = p->y; + 42: *(&p->x) = 1; + + +function field_refs takes-address-of it at source0.c:42 + 41: p->x = p->y; + 42: *(&p->x) = 1; + 43: u->p = 0; + + +function field_refs writes it at source0.c:44 + 43: u->p = 0; + 44: u->s.x = 2; + 45: return p->x; + + +function field_refs reads it at source0.c:45 + 44: u->s.x = 2; + 45: return p->x; + 46: } + + +function func_accepting_a_struct reads it at source0.c:31 + 30: { + 31: return ((some_struct_t*)p)->x + + 32: ((union some_union*)p)->x; + + diff --git a/pkg/codesearch/testdata/query-find-references-field1 b/pkg/codesearch/testdata/query-find-references-field1 new file mode 100644 index 000000000000..933d14b33976 --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-field1 @@ -0,0 +1,10 @@ +find-references source0.c some_struct::y "" 1 10 + +some_struct::y has 1 references: + +function field_refs reads it at source0.c:41 + 40: { + 41: p->x = p->y; + 42: *(&p->x) = 1; + + diff --git a/pkg/codesearch/testdata/query-find-references-field2 b/pkg/codesearch/testdata/query-find-references-field2 new file mode 100644 index 000000000000..08e1c27c542b --- /dev/null +++ b/pkg/codesearch/testdata/query-find-references-field2 @@ -0,0 +1,10 @@ +find-references source0.c some_union::p "" 1 10 + +some_union::p has 1 references: + +function field_refs writes it at source0.c:43 + 42: *(&p->x) = 1; + 43: u->p = 0; + 44: u->s.x = 2; + + diff --git a/pkg/codesearch/testdata/query-find-references-struct b/pkg/codesearch/testdata/query-find-references-struct index cfb266dc7bce..ba51c5e5394a 100644 --- a/pkg/codesearch/testdata/query-find-references-struct +++ b/pkg/codesearch/testdata/query-find-references-struct @@ -1,6 +1,11 @@ find-references source0.c some_struct "" 1 10 -some_struct has 2 references: +some_struct has 3 references: + +function field_refs uses it at source0.c:39 + 39: int field_refs(struct some_struct* p, union some_union* u) + 40: { + function func_accepting_a_struct uses it at source0.c:29 29: int func_accepting_a_struct(struct some_struct* p) diff --git a/pkg/codesearch/testdata/source0.c b/pkg/codesearch/testdata/source0.c index e5b47841b442..9482908c94f3 100644 --- a/pkg/codesearch/testdata/source0.c +++ b/pkg/codesearch/testdata/source0.c @@ -35,3 +35,12 @@ int func_accepting_a_struct(struct some_struct* p) void function_with_quotes_in_type(void __attribute__((btf_type_tag("user"))) *) { } + +int field_refs(struct some_struct* p, union some_union* u) +{ + p->x = p->y; + *(&p->x) = 1; + u->p = 0; + u->s.x = 2; + return p->x; +} diff --git a/pkg/codesearch/testdata/source0.c.json b/pkg/codesearch/testdata/source0.c.json index 5d347ae1b097..451ab58cc891 100644 --- a/pkg/codesearch/testdata/source0.c.json +++ b/pkg/codesearch/testdata/source0.c.json @@ -11,6 +11,73 @@ }, "comment": {} }, + { + "name": "field_refs", + "type": "int (struct some_struct *, union some_union *)", + "kind": "function", + "body": { + "file": "source0.c", + "start_line": 39, + "end_line": 46 + }, + "comment": {}, + "refs": [ + { + "name": "some_struct", + "kind": "uses", + "entity_kind": "struct", + "line": 39 + }, + { + "name": "some_union", + "kind": "uses", + "entity_kind": "union", + "line": 39 + }, + { + "name": "some_struct::x", + "kind": "writes", + "entity_kind": "field", + "line": 41 + }, + { + "name": "some_struct::y", + "kind": "reads", + "entity_kind": "field", + "line": 41 + }, + { + "name": "some_struct::x", + "kind": "takes-address-of", + "entity_kind": "field", + "line": 42 + }, + { + "name": "some_union::p", + "kind": "writes", + "entity_kind": "field", + "line": 43 + }, + { + "name": "some_struct::x", + "kind": "writes", + "entity_kind": "field", + "line": 44 + }, + { + "name": "some_union::s", + "kind": "reads", + "entity_kind": "field", + "line": 44 + }, + { + "name": "some_struct::x", + "kind": "reads", + "entity_kind": "field", + "line": 45 + } + ] + }, { "name": "func_accepting_a_struct", "type": "int (struct some_struct *)", @@ -28,6 +95,12 @@ "entity_kind": "struct", "line": 29 }, + { + "name": "some_struct::x", + "kind": "reads", + "entity_kind": "field", + "line": 31 + }, { "name": "some_struct_t", "kind": "uses", @@ -40,6 +113,12 @@ "entity_kind": "struct", "line": 31 }, + { + "name": "some_union::x", + "kind": "reads", + "entity_kind": "field", + "line": 32 + }, { "name": "some_union", "kind": "uses", @@ -155,7 +234,7 @@ "body": { "file": "source0.h", "start_line": 40, - "end_line": 43 + "end_line": 44 }, "comment": {} }, @@ -164,8 +243,8 @@ "kind": "enum", "body": { "file": "source0.h", - "start_line": 45, - "end_line": 48 + "start_line": 46, + "end_line": 49 }, "comment": {} }, @@ -184,8 +263,8 @@ "kind": "typedef", "body": { "file": "source0.h", - "start_line": 50, - "end_line": 50 + "start_line": 51, + "end_line": 51 }, "comment": {} }, diff --git a/pkg/codesearch/testdata/source0.h b/pkg/codesearch/testdata/source0.h index 9549feb9d845..c4ee661e5daf 100644 --- a/pkg/codesearch/testdata/source0.h +++ b/pkg/codesearch/testdata/source0.h @@ -40,6 +40,7 @@ typedef struct another_struct { union some_union { int x; void* p; + struct some_struct s; }; enum some_enum { diff --git a/tools/clang/codesearch/codesearch.cpp b/tools/clang/codesearch/codesearch.cpp index a1174b26b2e4..df91c84fcf57 100644 --- a/tools/clang/codesearch/codesearch.cpp +++ b/tools/clang/codesearch/codesearch.cpp @@ -8,6 +8,7 @@ #include "clang/AST/Comment.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclarationName.h" +#include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CompilerInstance.h" @@ -82,6 +83,7 @@ class Indexer : public RecursiveASTVisitor { bool VisitDeclRefExpr(const DeclRefExpr*); bool VisitTagType(const TagType*); bool VisitTypedefType(const TypedefType*); + bool VisitMemberExpr(const MemberExpr*); private: ASTContext& Context; @@ -92,7 +94,9 @@ class Indexer : public RecursiveASTVisitor { // If set, record references to struct types as uses. SourceLocation TypeRefingLocation; + const Stmt* GetParent(const Stmt* S) const; void EmitReference(SourceLocation Loc, const NamedDecl* Named, const char* EntityKind, const char* RefKind); + void EmitReference(SourceLocation Loc, const std::string& Name, const char* EntityKind, const char* RefKind); struct NamedDeclEmitter { NamedDeclEmitter(Indexer* Parent, const NamedDecl* Decl, const char* Kind, const std::string& Type, bool IsStatic); @@ -247,11 +251,46 @@ bool Indexer::VisitTypedefType(const TypedefType* T) { return true; } +bool Indexer::VisitMemberExpr(const MemberExpr* E) { + auto* Record = E->getBase()->getType()->getAsRecordDecl(); + if (auto* Ptr = dyn_cast(E->getBase()->getType())) + Record = Ptr->getPointeeType()->getAsRecordDecl(); + if (!Record) + return true; + const std::string Field = Record->getNameAsString() + "::" + E->getMemberDecl()->getNameAsString(); + const char* RefKind = RefKindRead; + const Stmt* P = GetParent(E); + if (auto* BO = dyn_cast(P)) { + if (E == BO->getLHS() && (BO->isAssignmentOp() || BO->isCompoundAssignmentOp() || BO->isShiftAssignOp())) + RefKind = RefKindWrite; + } + if (auto* UO = dyn_cast(P)) + RefKind = RefKindTakesAddr; + EmitReference(E->getMemberLoc(), Field, EntityKindField, RefKind); + return true; +} + +const Stmt* Indexer::GetParent(const Stmt* S) const { + for (;;) { + const auto& Parents = Context.getParents(*S); + if (!Parents.empty()) + S = Parents[0].get(); + else + S = nullptr; + // Presumably ParentExpr is never interesting. + if (S && isa(S)) + continue; + return S; + } +} + void Indexer::EmitReference(SourceLocation Loc, const NamedDecl* Named, const char* EntityKind, const char* RefKind) { - if (!Current || !Named || Named->getNameAsString().empty()) - return; - const std::string& Name = Named->getNameAsString(); - if (Name.empty()) + if (Named) + EmitReference(Loc, Named->getNameAsString(), EntityKind, RefKind); +} + +void Indexer::EmitReference(SourceLocation Loc, const std::string& Name, const char* EntityKind, const char* RefKind) { + if (!Current || Name.empty()) return; Current->Refs.push_back(Reference{ .Kind = RefKind, diff --git a/tools/clang/codesearch/output.h b/tools/clang/codesearch/output.h index 8768d30248a9..48382d6d119c 100644 --- a/tools/clang/codesearch/output.h +++ b/tools/clang/codesearch/output.h @@ -17,11 +17,14 @@ constexpr char EntityKindVariable[] = "variable"; constexpr char EntityKindMacro[] = "macro"; constexpr char EntityKindEnum[] = "enum"; constexpr char EntityKindTypedef[] = "typedef"; +constexpr char EntityKindField[] = "field"; // The uses reference is very generic, ideally we refine it in the future // (e.g. "used as an argument type", "cast to this type", "includes field of this type", etc). constexpr char RefKindUses[] = "uses"; constexpr char RefKindCall[] = "calls"; +constexpr char RefKindRead[] = "reads"; +constexpr char RefKindWrite[] = "writes"; constexpr char RefKindTakesAddr[] = "takes-address-of"; struct LineRange {