Skip to content

Commit fa7953d

Browse files
committed
pkg/aflow/tool/codeeditor: add actual implementation
1 parent e7922f7 commit fa7953d

File tree

2 files changed

+252
-18
lines changed

2 files changed

+252
-18
lines changed

pkg/aflow/tool/codeeditor/codeeditor.go

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@
44
package codeeditor
55

66
import (
7+
"bytes"
8+
"os"
79
"path/filepath"
10+
"slices"
811
"strings"
912

1013
"github.com/google/syzkaller/pkg/aflow"
14+
"github.com/google/syzkaller/pkg/codesearch"
1115
"github.com/google/syzkaller/pkg/osutil"
1216
)
1317

1418
var Tool = aflow.NewFuncTool("codeeditor", codeeditor, `
15-
The tool does one code edit to form the final patch.
19+
The tool does one source code edit to form the final patch by replacing full lines
20+
with new provided lines. If new code is empty, current lines will be deleted.
21+
Provide full lines of code including new line characters.
1622
The tool should be called mutiple times to do all required changes one-by-one,
1723
but avoid changing the same lines multiple times.
1824
Note: You will not see your edits via the codesearch tool.
@@ -24,27 +30,91 @@ type state struct {
2430
}
2531

2632
type args struct {
27-
SourceFile string `jsonschema:"Full source file path."`
28-
CurrentCode string `jsonschema:"The current code to replace verbatim with new lines, but without line numbers."`
29-
NewCode string `jsonschema:"New code to replace the current code snippet."`
33+
SourceFile string `jsonschema:"Full source file path to edit."`
34+
CurrentCode string `jsonschema:"The current code lines to be replaced."`
35+
NewCode string `jsonschema:"New code lines to replace the current code lines."`
3036
}
3137

3238
func codeeditor(ctx *aflow.Context, state state, args args) (struct{}, error) {
3339
if strings.Contains(filepath.Clean(args.SourceFile), "..") {
3440
return struct{}{}, aflow.BadCallError("SourceFile %q is outside of the source tree", args.SourceFile)
3541
}
3642
file := filepath.Join(state.KernelScratchSrc, args.SourceFile)
37-
if !osutil.IsExist(file) {
43+
// Filter out not source files too (e.g. .git, etc),
44+
// LLM have not seen them and should not be messing with them.
45+
if !osutil.IsExist(file) || !codesearch.IsSourceFile(file) {
3846
return struct{}{}, aflow.BadCallError("SourceFile %q does not exist", args.SourceFile)
3947
}
4048
if strings.TrimSpace(args.CurrentCode) == "" {
4149
return struct{}{}, aflow.BadCallError("CurrentCode snippet is empty")
4250
}
43-
// If SourceFile is incorrect, or CurrentCode is not matched, return aflow.BadCallError
44-
// with an explanation. Say that it needs to increase context if CurrentCode is not matched.
45-
// Try to do as fuzzy match for CurrentCode as possible (strip line numbers,
46-
// ignore white-spaces, etc).
47-
// Should we accept a reference line number, or function name to disambiguate in the case
48-
// of multiple matches?
49-
return struct{}{}, nil
51+
fileData, err := os.ReadFile(file)
52+
if err != nil {
53+
return struct{}{}, err
54+
}
55+
if len(fileData) == 0 || fileData[len(fileData)-1] != '\n' {
56+
// Generally shouldn't happen, but just in case.
57+
fileData = append(fileData, '\n')
58+
}
59+
if args.CurrentCode[len(args.CurrentCode)-1] != '\n' {
60+
args.CurrentCode += "\n"
61+
}
62+
if args.NewCode != "" && args.NewCode[len(args.NewCode)-1] != '\n' {
63+
args.NewCode += "\n"
64+
}
65+
lines := slices.Collect(bytes.Lines(fileData))
66+
src := slices.Collect(bytes.Lines([]byte(args.CurrentCode)))
67+
dst := slices.Collect(bytes.Lines([]byte(args.NewCode)))
68+
// First, try to match as is. If that fails, try a more permissive matching
69+
// that ignores whitespaces, empty lines, etc.
70+
newLines, matches := replace(lines, src, dst, false)
71+
if matches == 0 {
72+
newLines, matches = replace(lines, src, dst, true)
73+
}
74+
if matches == 0 {
75+
return struct{}{}, aflow.BadCallError("CurrentCode snippet does not match anything in the source file," +
76+
" provide more precise CurrentCode snippet")
77+
}
78+
if matches > 1 {
79+
return struct{}{}, aflow.BadCallError("CurrentCode snippet matched %v places,"+
80+
" increase context in CurrentCode to avoid ambiguity", matches)
81+
}
82+
err = osutil.WriteFile(file, slices.Concat(newLines...))
83+
return struct{}{}, err
84+
}
85+
86+
func replace(lines, src, dst [][]byte, fuzzy bool) (newLines [][]byte, matches int) {
87+
for i := 0; i < len(lines); i++ {
88+
li, si := i, 0
89+
for li < len(lines) && si < len(src) {
90+
l, s := lines[li], src[si]
91+
if fuzzy {
92+
// Ignore whitespaces and empty lines.
93+
l, s = bytes.TrimSpace(l), bytes.TrimSpace(s)
94+
// Potentially we can remove line numbers from s here if they are present,
95+
// or use them to disambiguate in the case of multiple matches.
96+
if len(s) == 0 {
97+
si++
98+
continue
99+
}
100+
if len(l) == 0 {
101+
li++
102+
continue
103+
}
104+
}
105+
if !bytes.Equal(l, s) {
106+
break
107+
}
108+
li++
109+
si++
110+
}
111+
if si != len(src) {
112+
newLines = append(newLines, lines[i])
113+
continue
114+
}
115+
matches++
116+
newLines = append(newLines, dst...)
117+
i = li - 1
118+
}
119+
return
50120
}

pkg/aflow/tool/codeeditor/codeeditor_test.go

Lines changed: 170 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
package codeeditor
55

66
import (
7+
"fmt"
8+
"os"
79
"path/filepath"
810
"testing"
911

@@ -15,7 +17,7 @@ import (
1517
func TestCodeeditorEscapingPath(t *testing.T) {
1618
aflow.TestTool(t, Tool,
1719
state{
18-
KernelScratchSrc: "whatever",
20+
KernelScratchSrc: t.TempDir(),
1921
},
2022
args{
2123
SourceFile: "../../passwd",
@@ -38,25 +40,187 @@ func TestCodeeditorMissingPath(t *testing.T) {
3840
)
3941
}
4042

43+
func TestCodeeditorNonSourceFile(t *testing.T) {
44+
dir := writeTestFile(t, "src", "data")
45+
aflow.TestTool(t, Tool,
46+
state{
47+
KernelScratchSrc: dir,
48+
},
49+
args{
50+
SourceFile: "src",
51+
},
52+
struct{}{},
53+
`SourceFile "src" does not exist`,
54+
)
55+
}
56+
4157
func TestCodeeditorEmptyCurrentCode(t *testing.T) {
42-
dir := writeTestFile(t, "foo", "data")
58+
dir := writeTestFile(t, "src.c", "data")
4359
aflow.TestTool(t, Tool,
4460
state{
4561
KernelScratchSrc: dir,
4662
},
4763
args{
48-
SourceFile: "foo",
64+
SourceFile: "src.c",
4965
},
5066
struct{}{},
5167
`CurrentCode snippet is empty`,
5268
)
5369
}
5470

71+
func TestCodeeditorNoMatches(t *testing.T) {
72+
dir := writeTestFile(t, "src.c", "foo")
73+
aflow.TestTool(t, Tool,
74+
state{
75+
KernelScratchSrc: dir,
76+
},
77+
args{
78+
SourceFile: "src.c",
79+
CurrentCode: "foobar",
80+
},
81+
struct{}{},
82+
`CurrentCode snippet does not match anything in the source file, provide more precise CurrentCode snippet`,
83+
)
84+
}
85+
86+
func TestCodeeditorMultipleMatches(t *testing.T) {
87+
dir := writeTestFile(t, "src.c", `
88+
linefoo
89+
bar
90+
foo
91+
bar
92+
foo
93+
fooline
94+
foo`)
95+
aflow.TestTool(t, Tool,
96+
state{
97+
KernelScratchSrc: dir,
98+
},
99+
args{
100+
SourceFile: "src.c",
101+
CurrentCode: "foo",
102+
},
103+
struct{}{},
104+
`CurrentCode snippet matched 3 places, increase context in CurrentCode to avoid ambiguity`,
105+
)
106+
}
107+
108+
func TestCodeeditorReplacement(t *testing.T) {
109+
type Test struct {
110+
curFile string
111+
curCode string
112+
newCode string
113+
newFile string
114+
}
115+
tests := []Test{
116+
{
117+
curFile: `
118+
line0
119+
line1
120+
lineee2
121+
lin3
122+
last line
123+
`,
124+
curCode: `line1
125+
lineee2
126+
lin3`,
127+
newCode: `replaced line`,
128+
newFile: `
129+
line0
130+
replaced line
131+
last line
132+
`,
133+
},
134+
{
135+
curFile: `
136+
line0
137+
line1
138+
last line
139+
`,
140+
curCode: `line1
141+
`,
142+
newCode: `replaced line 1
143+
replaced line 2
144+
replaced line 3`,
145+
newFile: `
146+
line0
147+
replaced line 1
148+
replaced line 2
149+
replaced line 3
150+
last line
151+
`,
152+
},
153+
{
154+
curFile: `
155+
line0
156+
line1
157+
line2
158+
`,
159+
curCode: `line2
160+
`,
161+
newCode: ``,
162+
newFile: `
163+
line0
164+
line1
165+
`,
166+
},
167+
{
168+
curFile: `that's it`,
169+
curCode: `that's it`,
170+
newCode: `that's that`,
171+
newFile: `that's that
172+
`,
173+
},
174+
{
175+
curFile: `
176+
line0
177+
line1
178+
179+
line2
180+
line3
181+
182+
line4
183+
`,
184+
curCode: `
185+
line1
186+
line2
187+
188+
189+
line3 `,
190+
newCode: ` replacement`,
191+
newFile: `
192+
line0
193+
replacement
194+
195+
line4
196+
`,
197+
},
198+
}
199+
for i, test := range tests {
200+
t.Run(fmt.Sprint(i), func(t *testing.T) {
201+
const filename = "src.c"
202+
dir := writeTestFile(t, filename, test.curFile)
203+
aflow.TestTool(t, Tool,
204+
state{
205+
KernelScratchSrc: dir,
206+
},
207+
args{
208+
SourceFile: filename,
209+
CurrentCode: test.curCode,
210+
NewCode: test.newCode,
211+
},
212+
struct{}{},
213+
"")
214+
data, err := os.ReadFile(filepath.Join(dir, filename))
215+
require.NoError(t, err)
216+
require.Equal(t, test.newFile, string(data))
217+
})
218+
}
219+
}
220+
55221
func writeTestFile(t *testing.T, filename, data string) string {
56222
dir := t.TempDir()
57-
if err := osutil.WriteFile(filepath.Join(dir, filename), []byte(data)); err != nil {
58-
t.Fatal(err)
59-
}
223+
require.NoError(t, osutil.WriteFile(filepath.Join(dir, filename), []byte(data)))
60224
return dir
61225
}
62226

0 commit comments

Comments
 (0)