diff --git a/Makefile b/Makefile index b10bfb6a1351..5cef2c92baf4 100644 --- a/Makefile +++ b/Makefile @@ -104,7 +104,7 @@ ifeq ("$(TARGETOS)", "trusty") endif .PHONY: all clean host target \ - manager executor kfuzztest ci hub \ + manager executor kfuzztest ci hub agent \ execprog mutate prog2c trace2syz repro upgrade db \ usbgen symbolize cover kconf syz-build crush \ bin/syz-extract bin/syz-fmt \ @@ -172,6 +172,9 @@ ci: descriptions hub: descriptions GOOS=$(HOSTOS) GOARCH=$(HOSTARCH) $(HOSTGO) build $(GOHOSTFLAGS) -o ./bin/syz-hub github.com/google/syzkaller/syz-hub +agent: descriptions + GOOS=$(HOSTOS) GOARCH=$(HOSTARCH) $(HOSTGO) build $(GOHOSTFLAGS) -o ./bin/syz-agent github.com/google/syzkaller/syz-agent + repro: descriptions GOOS=$(HOSTOS) GOARCH=$(HOSTARCH) $(HOSTGO) build $(GOHOSTFLAGS) -o ./bin/syz-repro github.com/google/syzkaller/tools/syz-repro diff --git a/go.mod b/go.mod index 040f9b2a7318..c4fd00536e41 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( cloud.google.com/go/logging v1.13.1 cloud.google.com/go/profiler v0.4.3 cloud.google.com/go/secretmanager v1.16.0 - cloud.google.com/go/spanner v1.82.0 + cloud.google.com/go/spanner v1.86.0 cloud.google.com/go/storage v1.57.1 github.com/VividCortex/gohistogram v1.0.0 github.com/argoproj/argo-workflows/v3 v3.7.3 @@ -18,6 +18,7 @@ require ( github.com/google/flatbuffers v25.9.23+incompatible github.com/google/generative-ai-go v0.20.1 github.com/google/go-cmp v0.7.0 + github.com/google/jsonschema-go v0.3.0 github.com/google/uuid v1.6.0 github.com/gorilla/handlers v1.5.2 github.com/ianlancetaylor/demangle v0.0.0-20251118225945-96ee0021ea0f @@ -34,7 +35,8 @@ require ( golang.org/x/tools v0.38.0 google.golang.org/api v0.257.0 google.golang.org/appengine/v2 v2.0.6 - google.golang.org/genproto v0.0.0-20250603155806-513f23925822 + google.golang.org/genai v1.40.0 + google.golang.org/genproto v0.0.0-20251014184007-4626949a642f google.golang.org/grpc v1.78.0 google.golang.org/protobuf v1.36.10 gopkg.in/yaml.v3 v3.0.1 @@ -172,6 +174,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect github.com/googleapis/gax-go/v2 v2.15.0 // indirect github.com/gordonklaus/ineffassign v0.1.0 // indirect + github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/gostaticanalysis/analysisutil v0.7.1 // indirect github.com/gostaticanalysis/comment v1.5.0 // indirect github.com/gostaticanalysis/forcetypeassert v0.2.0 // indirect @@ -268,7 +271,7 @@ require ( github.com/spf13/afero v1.14.0 // indirect github.com/spf13/cast v1.9.2 // indirect github.com/spf13/cobra v1.9.1 // indirect - github.com/spf13/pflag v1.0.7 // indirect + github.com/spf13/pflag v1.0.10 // indirect github.com/spf13/viper v1.20.1 // indirect github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect github.com/ssgreg/nlreturn/v2 v2.2.1 // indirect diff --git a/go.sum b/go.sum index 58e66a813a83..1a44df23c1ea 100644 --- a/go.sum +++ b/go.sum @@ -216,8 +216,8 @@ cloud.google.com/go/datacatalog v1.8.0/go.mod h1:KYuoVOv9BM8EYz/4eMFxrr4DUKhGIOX cloud.google.com/go/datacatalog v1.8.1/go.mod h1:RJ58z4rMp3gvETA465Vg+ag8BGgBdnRPEMMSTr5Uv+M= cloud.google.com/go/datacatalog v1.12.0/go.mod h1:CWae8rFkfp6LzLumKOnmVh4+Zle4A3NXLzVJ1d1mRm0= cloud.google.com/go/datacatalog v1.13.0/go.mod h1:E4Rj9a5ZtAxcQJlEBTLgMTphfP11/lNaAshpoBgemX8= -cloud.google.com/go/datacatalog v1.26.0 h1:eFgygb3DTufTWWUB8ARk+dSuXz+aefNJXTlkWlQcWwE= -cloud.google.com/go/datacatalog v1.26.0/go.mod h1:bLN2HLBAwB3kLTFT5ZKLHVPj/weNz6bR0c7nYp0LE14= +cloud.google.com/go/datacatalog v1.26.1 h1:bCRKA8uSQN8wGW3Tw0gwko4E9a64GRmbW1nCblhgC2k= +cloud.google.com/go/datacatalog v1.26.1/go.mod h1:2Qcq8vsHNxMDgjgadRFmFG47Y+uuIVsyEGUrlrKEdrg= cloud.google.com/go/dataflow v0.6.0/go.mod h1:9QwV89cGoxjjSR9/r7eFDqqjtvbKxAK2BaYU6PVk9UM= cloud.google.com/go/dataflow v0.7.0/go.mod h1:PX526vb4ijFMesO1o202EaUmouZKBpjHsTlCtB4parQ= cloud.google.com/go/dataflow v0.8.0/go.mod h1:Rcf5YgTKPtQyYz8bLYhFoIV/vP39eL7fWNcSOyFfLJE= @@ -544,8 +544,8 @@ cloud.google.com/go/shell v1.6.0/go.mod h1:oHO8QACS90luWgxP3N9iZVuEiSF84zNyLytb+ cloud.google.com/go/spanner v1.41.0/go.mod h1:MLYDBJR/dY4Wt7ZaMIQ7rXOTLjYrmxLE/5ve9vFfWos= cloud.google.com/go/spanner v1.44.0/go.mod h1:G8XIgYdOK+Fbcpbs7p2fiprDw4CaZX63whnSMLVBxjk= cloud.google.com/go/spanner v1.45.0/go.mod h1:FIws5LowYz8YAE1J8fOS7DJup8ff7xJeetWEo5REA2M= -cloud.google.com/go/spanner v1.82.0 h1:w9uO8RqEoBooBLX4nqV1RtgudyU2ZX780KTLRgeVg60= -cloud.google.com/go/spanner v1.82.0/go.mod h1:BzybQHFQ/NqGxvE/M+/iU29xgutJf7Q85/4U9RWMto0= +cloud.google.com/go/spanner v1.86.0 h1:jlNWusBol1Jxa9PmYGknUBzLwvD1cebuEenzqebZ9xs= +cloud.google.com/go/spanner v1.86.0/go.mod h1:bbwCXbM+zljwSPLZ44wZOdzcdmy89hbUGmM/r9sD0ws= cloud.google.com/go/speech v1.6.0/go.mod h1:79tcr4FHCimOp56lwC01xnt/WPJZc4v3gzyT7FoBkCM= cloud.google.com/go/speech v1.7.0/go.mod h1:KptqL+BAQIhMsj1kOP2la5DSEEerPDuOP/2mmkhHhZQ= cloud.google.com/go/speech v1.8.0/go.mod h1:9bYIl1/tjsAnMgKGHKmBZzXKEkGgtU+MpdDPTE9f7y0= @@ -584,8 +584,8 @@ cloud.google.com/go/trace v1.3.0/go.mod h1:FFUE83d9Ca57C+K8rDl/Ih8LwOzWIV1krKgxg cloud.google.com/go/trace v1.4.0/go.mod h1:UG0v8UBqzusp+z63o7FK74SdFE+AXpCLdFb1rshXG+Y= cloud.google.com/go/trace v1.8.0/go.mod h1:zH7vcsbAhklH8hWFig58HvxcxyQbaIqMarMg9hn5ECA= cloud.google.com/go/trace v1.9.0/go.mod h1:lOQqpE5IaWY0Ixg7/r2SjixMuc6lfTFeO4QGM4dQWOk= -cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4= -cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI= +cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U= +cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s= cloud.google.com/go/translate v1.3.0/go.mod h1:gzMUwRjvOqj5i69y/LYLd8RrNQk+hOmIXTi9+nb3Djs= cloud.google.com/go/translate v1.4.0/go.mod h1:06Dn/ppvLD6WvA5Rhdp029IX2Mi3Mn7fpMRLPvXT5Wg= cloud.google.com/go/translate v1.5.0/go.mod h1:29YDSYveqqpA1CQFD7NQuP49xymq17RXNaUDdc0mNu0= @@ -1033,6 +1033,8 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q= +github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/keep-sorted v0.6.1 h1:LNEdDKYxoXOrn4ZXC+FdUfJCVbUjhb2QPIBs5XISXCI= github.com/google/keep-sorted v0.6.1/go.mod h1:JYy9vljs7P8b3QdPOQkywA+4u36FUHwsNITZIpJyPkE= github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no= @@ -1095,6 +1097,8 @@ github.com/gordonklaus/ineffassign v0.1.0 h1:y2Gd/9I7MdY1oEIt+n+rowjBNDcLQq3RsH5 github.com/gordonklaus/ineffassign v0.1.0/go.mod h1:Qcp2HIAYhR7mNUVSIxZww3Guk4it82ghYcEXIAk+QT0= github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= +github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= github.com/gostaticanalysis/analysisutil v0.7.1 h1:ZMCjoue3DtDWQ5WyU16YbjbQEQ3VuzwxALrpYd+HeKk= github.com/gostaticanalysis/analysisutil v0.7.1/go.mod h1:v21E3hY37WKMGSnbsw2S/ojApNWb6C1//mXO48CXbVc= github.com/gostaticanalysis/comment v1.4.1/go.mod h1:ih6ZxzTHLdadaiSnF5WY3dxUoXfXAlTaRzuaNDlSado= @@ -1135,8 +1139,6 @@ github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq github.com/iancoleman/strcase v0.2.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/ianlancetaylor/demangle v0.0.0-20250628045327-2d64ad6b7ec5 h1:QCtizt3VTaANvnsd8TtD/eonx7JLIVdEKW1//ZNPZ9A= -github.com/ianlancetaylor/demangle v0.0.0-20250628045327-2d64ad6b7ec5/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= github.com/ianlancetaylor/demangle v0.0.0-20251118225945-96ee0021ea0f h1:Fnl4pzx8SR7k7JuzyW8lEtSFH6EQ8xgcypgIn8pcGIE= github.com/ianlancetaylor/demangle v0.0.0-20251118225945-96ee0021ea0f/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -1398,8 +1400,8 @@ github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= -github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.20.1 h1:ZMi+z/lvLyPSCoNtFCpqjy0S4kPbirhpTMwl8BkW9X4= github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqjJvu4= github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= @@ -2036,6 +2038,8 @@ google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCID google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine/v2 v2.0.6 h1:LvPZLGuchSBslPBp+LAhihBeGSiRh1myRoYK4NtuBIw= google.golang.org/appengine/v2 v2.0.6/go.mod h1:WoEXGoXNfa0mLvaH5sV3ZSGXwVmy8yf7Z1JKf3J3wLI= +google.golang.org/genai v1.40.0 h1:kYxyQSH+vsib8dvsgyLJzsVEIv5k3ZmHJyVqdvGncmc= +google.golang.org/genai v1.40.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -2168,8 +2172,8 @@ google.golang.org/genproto v0.0.0-20230323212658-478b75c54725/go.mod h1:UUQDJDOl google.golang.org/genproto v0.0.0-20230330154414-c0448cd141ea/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= -google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= -google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= +google.golang.org/genproto v0.0.0-20251014184007-4626949a642f h1:vLd1CJuJOUgV6qijD7KT5Y2ZtC97ll4dxjTUappMnbo= +google.golang.org/genproto v0.0.0-20251014184007-4626949a642f/go.mod h1:PI3KrSadr00yqfv6UDvgZGFsmLqeRIwt8x4p5Oo7CdM= google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda h1:+2XxjfsAu6vqFxwGBRcHiMaDCuZiqXGDUDVWVtrFAnE= google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo= google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846 h1:Wgl1rcDNThT+Zn47YyCXOXyX/COgMTIdhJ717F0l4xk= diff --git a/pkg/aflow/action.go b/pkg/aflow/action.go new file mode 100644 index 000000000000..cd09466a4427 --- /dev/null +++ b/pkg/aflow/action.go @@ -0,0 +1,37 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +type Action interface { + verify(*verifyContext) + execute(*Context) error +} + +type Pipeline struct { + // These actions are invoked sequentially, + // but dataflow across actions is specified by their use + // of variables in args/instructions/prompts. + Actions []Action +} + +func NewPipeline(actions ...Action) *Pipeline { + return &Pipeline{ + Actions: actions, + } +} + +func (p *Pipeline) execute(ctx *Context) error { + for _, sub := range p.Actions { + if err := sub.execute(ctx); err != nil { + return err + } + } + return nil +} + +func (p *Pipeline) verify(ctx *verifyContext) { + for _, a := range p.Actions { + a.verify(ctx) + } +} diff --git a/pkg/aflow/action/crash/reproduce.go b/pkg/aflow/action/crash/reproduce.go new file mode 100644 index 000000000000..33be02b27dee --- /dev/null +++ b/pkg/aflow/action/crash/reproduce.go @@ -0,0 +1,115 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package crash + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/build" + "github.com/google/syzkaller/pkg/hash" + "github.com/google/syzkaller/pkg/instance" + "github.com/google/syzkaller/pkg/mgrconfig" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/sys/targets" +) + +// Reproduce action tries to reproduce a crash with the given reproducer, +// and outputs the resulting crash report. +// If the reproducer does not trigger a crash, action fails. +var Reproduce = aflow.NewFuncAction("crash-reproducer", reproduce) + +type reproduceArgs struct { + Syzkaller string + Image string + Type string + VM json.RawMessage + ReproOpts string + ReproSyz string + ReproC string + SyzkallerCommit string + KernelSrc string + KernelObj string + KernelCommit string + KernelConfig string +} + +type reproduceResult struct { + CrashReport string +} + +func reproduce(ctx *aflow.Context, args reproduceArgs) (reproduceResult, error) { + if args.Type != "qemu" { + // Since we use injected kernel boot, and don't build full disk image. + return reproduceResult{}, errors.New("only qemu VM type is supported") + } + imageData, err := os.ReadFile(args.Image) + if err != nil { + return reproduceResult{}, err + } + desc := fmt.Sprintf("kernel commit %v, kernel config hash %v, image hash %v,"+ + " vm %v, vm config hash %v, C repro hash %v", + args.KernelCommit, hash.String(args.KernelConfig), hash.String(imageData), + args.Type, hash.String(args.VM), hash.String(args.ReproC)) + dir, err := ctx.Cache("repro", desc, func(dir string) error { + var vmConfig map[string]any + if err := json.Unmarshal(args.VM, &vmConfig); err != nil { + return fmt.Errorf("failed to parse VM config: %w", err) + } + vmConfig["kernel"] = filepath.Join(args.KernelObj, filepath.FromSlash(build.LinuxKernelImage(targets.AMD64))) + vmCfg, err := json.Marshal(vmConfig) + if err != nil { + return fmt.Errorf("failed to serialize VM config: %w", err) + } + cfg := mgrconfig.DefaultValues() + cfg.RawTarget = "linux/amd64" + cfg.Workdir = filepath.Join(dir, "workdir") + cfg.Syzkaller = args.Syzkaller + cfg.KernelObj = args.KernelObj + cfg.KernelSrc = args.KernelSrc + cfg.Image = args.Image + cfg.Type = args.Type + cfg.VM = vmCfg + if err := mgrconfig.SetTargets(cfg); err != nil { + return err + } + if err := mgrconfig.Complete(cfg); err != nil { + return err + } + env, err := instance.NewEnv(cfg, nil, nil) + if err != nil { + return err + } + results, err := env.Test(1, nil, nil, []byte(args.ReproC)) + if err != nil { + return err + } + os.RemoveAll(cfg.Workdir) + if results[0].Error == nil { + results[0].Error = errors.New("reproducer did not crash") + } + file, data := "", []byte(nil) + var crashErr *instance.CrashError + if errors.As(results[0].Error, &crashErr) { + file, data = "report", crashErr.Report.Report + } else { + file, data = "error", []byte(results[0].Error.Error()) + } + return osutil.WriteFile(filepath.Join(dir, file), data) + }) + if err != nil { + return reproduceResult{}, err + } + if data, err := os.ReadFile(filepath.Join(dir, "error")); err == nil { + return reproduceResult{}, errors.New(string(data)) + } + data, err := os.ReadFile(filepath.Join(dir, "report")) + return reproduceResult{ + CrashReport: string(data), + }, err +} diff --git a/pkg/aflow/action/kernel/build.go b/pkg/aflow/action/kernel/build.go new file mode 100644 index 000000000000..17e9fb2422ad --- /dev/null +++ b/pkg/aflow/action/kernel/build.go @@ -0,0 +1,63 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package kernel + +import ( + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "runtime" + "time" + + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/build" + "github.com/google/syzkaller/pkg/hash" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/sys/targets" +) + +// Build action builds the Linux kernel from the given sources, +// outputs directory with build artifacts. +var Build = aflow.NewFuncAction("kernel-builder", buildKernel) + +type buildArgs struct { + KernelSrc string + KernelCommit string + KernelConfig string +} + +type buildResult struct { + KernelObj string // Directory with build artifacts. +} + +func buildKernel(ctx *aflow.Context, args buildArgs) (buildResult, error) { + desc := fmt.Sprintf("kernel commit %v, kernel config hash %v", + args.KernelCommit, hash.String(args.KernelConfig)) + dir, err := ctx.Cache("build", desc, func(dir string) error { + if err := osutil.WriteFile(filepath.Join(dir, ".config"), []byte(args.KernelConfig)); err != nil { + return err + } + target := targets.List[targets.Linux][targets.AMD64] + image := filepath.FromSlash(build.LinuxKernelImage(targets.AMD64)) + makeArgs := build.LinuxMakeArgs(target, targets.DefaultLLVMCompiler, targets.DefaultLLVMLinker, + "ccache", dir, runtime.NumCPU()) + makeArgs = append(makeArgs, path.Base(image), "compile_commands.json") + if _, err := osutil.RunCmd(time.Hour, args.KernelSrc, "make", makeArgs...); err != nil { + return err + } + // Remove main intermediate build files, we don't need them anymore + // and they take lots of space. Keep generated source files. + keepExt := map[string]bool{"h": true, "c": true, "s": true, "S": true} + keepFiles := map[string]bool{image: true} + return filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if err != nil || d.IsDir() || keepFiles[path] || keepExt[filepath.Ext(d.Name())] { + return err + } + return os.Remove(path) + }) + }) + return buildResult{KernelObj: dir}, err +} diff --git a/pkg/aflow/action/kernel/checkout.go b/pkg/aflow/action/kernel/checkout.go new file mode 100644 index 000000000000..397412393558 --- /dev/null +++ b/pkg/aflow/action/kernel/checkout.go @@ -0,0 +1,90 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package kernel + +import ( + "path/filepath" + "sync" + "time" + + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/vcs" + "github.com/google/syzkaller/sys/targets" +) + +// Checkout action checks out the Linux kernel on the given commit, +// outputs the source directory with the checkout. +var Checkout = aflow.NewFuncAction("kernel-checkouter", checkout) + +type checkoutArgs struct { + KernelRepo string + KernelCommit string +} + +type checkoutResult struct { + // Directory with the checked out sources. + KernelSrc string +} + +func checkout(ctx *aflow.Context, args checkoutArgs) (checkoutResult, error) { + var res checkoutResult + err := UseLinuxRepo(ctx, func(kernelRepoDir string, repo vcs.Repo) error { + dir, err := ctx.Cache("src", args.KernelCommit, func(dir string) error { + if _, err := repo.SwitchCommit(args.KernelCommit); err != nil { + if _, err := repo.CheckoutCommit(args.KernelRepo, args.KernelCommit); err != nil { + return err + } + } + // The following commit breaks compile_commands.json by adding bogus commands that fail. + // There is no easy way to filter out these bogus commands, so we revert the commit. + const ( + // scripts/clang-tools: Handle included .c files in gen_compile_commands + badCommit = "9362d34acf91a706c543d919ade3e651b9bd2d6f" + // Revert "scripts/clang-tools: Handle included .c files in gen_compile_commands" + revertCommit = "07fe35b766a6fcd4ec8214e5066b7b0056b6ec6a" + ) + + if ok, err := repo.Contains(revertCommit); err != nil { + return err + } else if !ok { + if ok, err := repo.Contains(badCommit); err != nil { + return err + } else if ok { + if _, err = osutil.RunCmd(time.Hour, kernelRepoDir, + "git", "revert", "--no-edit", badCommit); err != nil { + return err + } + } + } + if _, err := osutil.RunCmd(time.Hour, dir, "git", "init"); err != nil { + return err + } + if _, err := osutil.RunCmd(time.Hour, dir, "git", "remote", "add", "origin", kernelRepoDir); err != nil { + return err + } + if _, err := osutil.RunCmd(time.Hour, dir, "git", "pull", "origin", "HEAD", "--depth=1", + "--allow-unrelated-histories"); err != nil { + return err + } + return nil + }) + res.KernelSrc = dir + return err + }) + return res, err +} + +var repoMu sync.Mutex + +func UseLinuxRepo(ctx *aflow.Context, fn func(string, vcs.Repo) error) error { + repoMu.Lock() + defer repoMu.Unlock() + kernelRepoDir := filepath.Join(ctx.Workdir, "repo", targets.Linux) + repo, err := vcs.NewRepo(targets.Linux, "", kernelRepoDir) + if err != nil { + return err + } + return fn(kernelRepoDir, repo) +} diff --git a/pkg/aflow/cache.go b/pkg/aflow/cache.go new file mode 100644 index 000000000000..fe60e5358ecd --- /dev/null +++ b/pkg/aflow/cache.go @@ -0,0 +1,201 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + "maps" + "os" + "path/filepath" + "slices" + "sync" + "testing" + "time" + + "github.com/google/syzkaller/pkg/hash" + "github.com/google/syzkaller/pkg/osutil" +) + +// Cache maintains on-disk cache with directories with arbitrary contents (kernel checkouts, builds, etc). +// Create method is used to either create a new directory, if it's not cached yet, or returns a previously +// cached directory. Old unused directories are incrementally removed if the total disk usage grows +// over the specified limit. +type Cache struct { + dir string + maxSize uint64 + timeNow func() time.Time + t *testing.T + mu sync.Mutex + currentSize uint64 + entries map[string]*cacheEntry +} + +type cacheEntry struct { + dir string + size uint64 + usageCount int + lastUsed time.Time +} + +func NewCache(dir string, maxSize uint64) (*Cache, error) { + return newTestCache(nil, dir, maxSize, time.Now) +} + +func newTestCache(t *testing.T, dir string, maxSize uint64, timeNow func() time.Time) (*Cache, error) { + if dir == "" { + return nil, fmt.Errorf("cache workdir is empty") + } + c := &Cache{ + dir: osutil.Abs(dir), + maxSize: maxSize, + timeNow: timeNow, + t: t, + entries: make(map[string]*cacheEntry), + } + if err := c.init(); err != nil { + return nil, err + } + return c, nil +} + +// Create creates/returns a cached directory with contents created by the populate callback. +// The populate callback receives a dir it needs to populate with cached files. +// The typ must be a short descriptive name of the contents (e.g. "build", "source", etc). +// The desc is used to identify cached entries and must fully describe the cached contents +// (the second invocation with the same typ+desc will return dir created by the first +// invocation with the same typ+desc). +func (c *Cache) Create(typ, desc string, populate func(string) error) (string, error) { + c.mu.Lock() + defer c.mu.Unlock() + // Note: we don't populate a temp dir and then atomically rename it to the final destination, + // because at least kernel builds encode the current path in debug info/compile commands, + // so moving the dir later would break all that. Instead we rely on the presence of the meta file + // to denote valid cache entries. Modification time of the file says when it was last used. + id := hash.String(desc) + dir := filepath.Join(c.dir, typ, id) + metaFile := filepath.Join(dir, cacheMetaFile) + if c.entries[dir] == nil { + os.RemoveAll(dir) + if err := osutil.MkdirAll(dir); err != nil { + return "", err + } + if err := populate(dir); err != nil { + os.RemoveAll(dir) + return "", err + } + size, err := osutil.DiskUsage(dir) + if err != nil { + return "", err + } + if err := osutil.WriteFile(metaFile, []byte(desc)); err != nil { + os.RemoveAll(dir) + return "", err + } + c.entries[dir] = &cacheEntry{ + dir: dir, + size: size, + } + c.currentSize += size + c.logf("created entry %v, size %v, current size %v", dir, size, c.currentSize) + } + // Note the entry was used now. + now := c.timeNow() + if err := os.Chtimes(metaFile, now, now); err != nil { + return "", err + } + entry := c.entries[dir] + entry.usageCount++ + entry.lastUsed = now + c.logf("using entry %v, usage count %v", dir, entry.usageCount) + if err := c.purge(); err != nil { + entry.usageCount-- + return "", err + } + return dir, nil +} + +// Release must be called for every directory returned by Create method when the directory is not used anymore. +func (c *Cache) Release(dir string) { + c.mu.Lock() + defer c.mu.Unlock() + entry := c.entries[dir] + entry.usageCount-- + c.logf("release entry %v, usage count %v", dir, entry.usageCount) + if entry.usageCount < 0 { + panic("negative usageCount") + } +} + +// init reads the cached dirs (disk usage, last use time) from disk when the cache is created. +func (c *Cache) init() error { + dirs, err := filepath.Glob(filepath.Join(c.dir, "*", "*")) + if err != nil { + return err + } + for _, dir := range dirs { + metaFile := filepath.Join(dir, cacheMetaFile) + if !osutil.IsExist(metaFile) { + if err := osutil.RemoveAll(dir); err != nil { + return err + } + continue + } + stat, err := os.Stat(metaFile) + if err != nil { + return err + } + size, err := osutil.DiskUsage(dir) + if err != nil { + return err + } + c.entries[dir] = &cacheEntry{ + dir: dir, + size: size, + lastUsed: stat.ModTime(), + } + c.currentSize += size + } + c.mu.Lock() + defer c.mu.Unlock() + return c.purge() +} + +// purge removes oldest unused directories if the cache is over maxSize. +func (c *Cache) purge() error { + if c.mu.TryLock() { + panic("c.mu is not locked") + } + if c.currentSize < c.maxSize { + return nil + } + list := slices.Collect(maps.Values(c.entries)) + slices.SortFunc(list, func(a, b *cacheEntry) int { + if a.usageCount != b.usageCount { + return a.usageCount - b.usageCount + } + return a.lastUsed.Compare(b.lastUsed) + }) + for _, entry := range list { + if entry.usageCount != 0 || c.currentSize < c.maxSize { + break + } + if err := os.RemoveAll(entry.dir); err != nil { + return err + } + delete(c.entries, entry.dir) + if c.currentSize < entry.size { + panic(fmt.Sprintf("negative currentSize: %v %v", c.currentSize, entry.size)) + } + c.currentSize -= entry.size + } + return nil +} + +func (c *Cache) logf(msg string, args ...any) { + if c.t != nil { + c.t.Logf("cache: "+msg, args...) + } +} + +const cacheMetaFile = "aflow-meta" diff --git a/pkg/aflow/cache_test.go b/pkg/aflow/cache_test.go new file mode 100644 index 000000000000..244defdd374b --- /dev/null +++ b/pkg/aflow/cache_test.go @@ -0,0 +1,144 @@ +// Copyright 2026 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "testing" + "time" + + "github.com/google/syzkaller/pkg/osutil" + "github.com/stretchr/testify/require" +) + +func TestCache(t *testing.T) { + var mockedTime time.Time + timeNow := func() time.Time { + return mockedTime + } + tempDir := t.TempDir() + c, err := newTestCache(t, tempDir, 1<<40, timeNow) + require.NoError(t, err) + dir1, err := c.Create("foo", "1", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "bar"), []byte("abc")) + }) + require.NoError(t, err) + data, err := os.ReadFile(filepath.Join(dir1, "bar")) + require.NoError(t, err) + require.Equal(t, data, []byte("abc")) + c.Release(dir1) + + dir2, err := c.Create("foo", "1", func(dir string) error { + t.Fatal("must not be called") + return nil + }) + require.NoError(t, err) + require.Equal(t, dir2, dir1) + data, err = os.ReadFile(filepath.Join(dir2, "bar")) + require.NoError(t, err) + require.Equal(t, data, []byte("abc")) + c.Release(dir2) + + dir3, err := c.Create("foo", "2", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "baz"), []byte("def")) + }) + require.NoError(t, err) + require.NotEqual(t, dir3, dir1) + data, err = os.ReadFile(filepath.Join(dir3, "baz")) + require.NoError(t, err) + require.Equal(t, data, []byte("def")) + c.Release(dir3) + + failedDir := "" + dir4, err := c.Create("foo", "3", func(dir string) error { + failedDir = dir + return fmt.Errorf("failed") + }) + require.Error(t, err) + require.Empty(t, dir4) + require.False(t, osutil.IsExist(failedDir)) + + // Create a new cache, it should pick up the state from disk. + c, err = newTestCache(t, tempDir, 1<<40, timeNow) + require.NoError(t, err) + + dir5, err := c.Create("foo", "1", func(dir string) error { + t.Fatal("must not be called") + return nil + }) + require.NoError(t, err) + require.Equal(t, dir5, dir1) + data, err = os.ReadFile(filepath.Join(dir5, "bar")) + require.NoError(t, err) + require.Equal(t, data, []byte("abc")) + c.Release(dir5) + + // Model an incomplete dir without metadata, it should be removed. + strayDir := filepath.Join(tempDir, "a", "b") + require.NoError(t, osutil.MkdirAll(strayDir)) + require.NoError(t, osutil.WriteFile(filepath.Join(strayDir, "foo"), []byte("foo"))) + + // With 0 max size everything unused should be purged. + _, err = newTestCache(t, tempDir, 0, timeNow) + require.NoError(t, err) + require.False(t, osutil.IsExist(dir1)) + require.False(t, osutil.IsExist(dir3)) + require.False(t, osutil.IsExist(strayDir)) + + // Test incremental purging of files. + c, err = newTestCache(t, tempDir, 100<<10, timeNow) + require.NoError(t, err) + + mockedTime = mockedTime.Add(time.Minute) + dir6, err := c.Create("foo", "1", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "bar"), bytes.Repeat([]byte{'a'}, 5<<10)) + }) + require.NoError(t, err) + c.Release(dir6) + + mockedTime = mockedTime.Add(time.Minute) + dir7, err := c.Create("foo", "2", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "bar"), bytes.Repeat([]byte{'a'}, 5<<10)) + }) + require.NoError(t, err) + c.Release(dir7) + + mockedTime = mockedTime.Add(time.Minute) + dir8, err := c.Create("foo", "3", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "bar"), bytes.Repeat([]byte{'a'}, 60<<10)) + }) + require.NoError(t, err) + c.Release(dir8) + + // Force update of the last access time for the first dir. + mockedTime = mockedTime.Add(time.Minute) + dir9, err := c.Create("foo", "1", func(dir string) error { + t.Fatal("must not be called") + return nil + }) + require.NoError(t, err) + require.Equal(t, dir6, dir9) + c.Release(dir9) + + // Both dirs should exist since they should fit into cache size. + require.True(t, osutil.IsExist(dir6)) + require.True(t, osutil.IsExist(dir7)) + require.True(t, osutil.IsExist(dir8)) + + mockedTime = mockedTime.Add(time.Minute) + dir10, err := c.Create("foo", "4", func(dir string) error { + return osutil.WriteFile(filepath.Join(dir, "bar"), bytes.Repeat([]byte{'a'}, 60<<10)) + }) + require.NoError(t, err) + c.Release(dir10) + + // Two oldest dirs should be purged. + require.True(t, osutil.IsExist(dir6)) + require.False(t, osutil.IsExist(dir7)) + require.False(t, osutil.IsExist(dir8)) + require.True(t, osutil.IsExist(dir10)) +} diff --git a/pkg/aflow/execute.go b/pkg/aflow/execute.go new file mode 100644 index 000000000000..6e724988e17d --- /dev/null +++ b/pkg/aflow/execute.go @@ -0,0 +1,186 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "context" + "fmt" + "maps" + "os" + "slices" + "strings" + "sync" + "time" + + "github.com/google/syzkaller/pkg/aflow/trajectory" + "github.com/google/syzkaller/pkg/osutil" + "google.golang.org/genai" +) + +// https://ai.google.dev/gemini-api/docs/models +const DefaultModel = "gemini-3-pro-preview" + +func (flow *Flow) Execute(c context.Context, model, workdir string, inputs map[string]any, + cache *Cache, onEvent onEvent) (map[string]any, error) { + if err := flow.checkInputs(inputs); err != nil { + return nil, fmt.Errorf("flow inputs are missing: %w", err) + } + ctx := &Context{ + Context: c, + Workdir: osutil.Abs(workdir), + cache: cache, + state: maps.Clone(inputs), + onEvent: onEvent, + } + defer ctx.close() + if s := c.Value(stubContextKey); s != nil { + ctx.stubContext = *s.(*stubContext) + } + if ctx.timeNow == nil { + ctx.timeNow = time.Now + } + if ctx.generateContent == nil { + var err error + ctx.generateContent, err = contentGenerator(c, model) + if err != nil { + return nil, err + } + } + span := &trajectory.Span{ + Type: trajectory.SpanFlow, + Name: flow.Name, + } + if err := ctx.startSpan(span); err != nil { + return nil, err + } + flowErr := flow.Root.execute(ctx) + if flowErr == nil { + span.Results = flow.extractOutputs(ctx.state) + } + if err := ctx.finishSpan(span, flowErr); err != nil { + return nil, err + } + if ctx.spanNesting != 0 { + // Since we finish all spans, even on errors, we should end up at 0. + panic(fmt.Sprintf("unbalanced spans (%v)", ctx.spanNesting)) + } + return span.Results, nil +} + +type ( + onEvent func(*trajectory.Span) error + generateContentFunc func(*genai.GenerateContentConfig, []*genai.Content) ( + *genai.GenerateContentResponse, error) + contextKeyType int +) + +var ( + createClientOnce sync.Once + createClientErr error + client *genai.Client + modelList = make(map[string]bool) + stubContextKey = contextKeyType(1) +) + +func contentGenerator(ctx context.Context, model string) (generateContentFunc, error) { + const modelPrefix = "models/" + createClientOnce.Do(func() { + if os.Getenv("GOOGLE_API_KEY") == "" { + createClientErr = fmt.Errorf("set GOOGLE_API_KEY env var to use with Gemini" + + " (see https://ai.google.dev/gemini-api/docs/api-key)") + return + } + client, createClientErr = genai.NewClient(ctx, nil) + if createClientErr != nil { + return + } + for m, err := range client.Models.All(ctx) { + if err != nil { + createClientErr = err + return + } + modelList[strings.TrimPrefix(m.Name, modelPrefix)] = m.Thinking + } + }) + if createClientErr != nil { + return nil, createClientErr + } + thinking, ok := modelList[model] + if !ok { + models := slices.Collect(maps.Keys(modelList)) + slices.Sort(models) + return nil, fmt.Errorf("model %q does not exist (models: %v)", model, models) + } + return func(cfg *genai.GenerateContentConfig, req []*genai.Content) (*genai.GenerateContentResponse, error) { + if thinking { + cfg.ThinkingConfig = &genai.ThinkingConfig{ + // We capture them in the trajectory for analysis. + IncludeThoughts: true, + // Enable "dynamic thinking" ("the model will adjust the budget based on the complexity of the request"). + // See https://ai.google.dev/gemini-api/docs/thinking#set-budget + // However, thoughts output also consumes total output token budget. + // We may consider adjusting ThinkingLevel parameter. + ThinkingBudget: genai.Ptr[int32](-1), + } + } + return client.Models.GenerateContent(ctx, modelPrefix+model, req, cfg) + }, nil +} + +type Context struct { + Context context.Context + Workdir string + cache *Cache + cachedDirs []string + state map[string]any + onEvent onEvent + spanSeq int + spanNesting int + stubContext +} + +type stubContext struct { + timeNow func() time.Time + generateContent generateContentFunc +} + +func (ctx *Context) Cache(typ, desc string, populate func(string) error) (string, error) { + dir, err := ctx.cache.Create(typ, desc, populate) + if err != nil { + return "", err + } + ctx.cachedDirs = append(ctx.cachedDirs, dir) + return dir, nil +} + +func (ctx *Context) close() { + for _, dir := range ctx.cachedDirs { + ctx.cache.Release(dir) + } +} + +func (ctx *Context) startSpan(span *trajectory.Span) error { + span.Seq = ctx.spanSeq + ctx.spanSeq++ + span.Nesting = ctx.spanNesting + ctx.spanNesting++ + span.Started = ctx.timeNow() + return ctx.onEvent(span) +} + +func (ctx *Context) finishSpan(span *trajectory.Span, spanErr error) error { + ctx.spanNesting-- + if ctx.spanNesting < 0 { + panic("unbalanced spans") + } + span.Finished = ctx.timeNow() + if spanErr != nil { + span.Error = spanErr.Error() + } + err := ctx.onEvent(span) + if spanErr != nil { + err = spanErr + } + return err +} diff --git a/pkg/aflow/flow.go b/pkg/aflow/flow.go new file mode 100644 index 000000000000..6325b2fd26e3 --- /dev/null +++ b/pkg/aflow/flow.go @@ -0,0 +1,99 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + + "github.com/google/syzkaller/pkg/aflow/ai" +) + +// Flow describes a single agentic workflow. +// A workflow takes some inputs, and produces some outputs in the end +// (specified as fields of the Inputs/Outputs struct types, correspondingly). +// A workflow consists of one or more actions that do the actual computation +// and produce the outputs. Actions can be based on an arbitrary Go function +// (FuncAction), or an LLM agent invocation (LLMAgent). Actions can produce +// final output fields, and/or intermediate inputs for subsequent actions. +// LLMAgent can also use tools that can accept workflow inputs, or outputs +// or preceding actions. +// A workflow is executed sequentially, but it can be thought of as a dataflow graph. +// Actions are nodes of the graph, and they consume/produce some named values +// (input/output fields, and intermediate values consumed by other actions). +type Flow struct { + Name string // Empty for the main workflow for the workflow type. + Root Action + + *FlowType +} + +type FlowType struct { + Type ai.WorkflowType + Description string + checkInputs func(map[string]any) error + extractOutputs func(map[string]any) map[string]any +} + +var Flows = make(map[string]*Flow) + +// Register a workflow type (characterized by Inputs and Outputs), +// and one or more implementations of the workflow type (actual workflows). +// All workflows for the same type consume the same inputs and produce the same outputs. +// There should be the "main" implementation for the workflow type with an empty name, +// and zero or more secondary implementations with non-empty names. +func Register[Inputs, Outputs any](typ ai.WorkflowType, description string, flows ...*Flow) { + if err := register[Inputs, Outputs](typ, description, Flows, flows); err != nil { + panic(err) + } +} + +func register[Inputs, Outputs any](typ ai.WorkflowType, description string, + all map[string]*Flow, flows []*Flow) error { + t := &FlowType{ + Type: typ, + Description: description, + checkInputs: func(inputs map[string]any) error { + _, err := convertFromMap[Inputs](inputs, false) + return err + }, + extractOutputs: func(state map[string]any) map[string]any { + // Ensure that we actually have all outputs. + tmp, err := convertFromMap[Outputs](state, false) + if err != nil { + panic(err) + } + return convertToMap(tmp) + }, + } + for _, flow := range flows { + if flow.Name == "" { + flow.Name = string(typ) + } else { + flow.Name = string(typ) + "-" + flow.Name + } + flow.FlowType = t + if err := registerOne[Inputs, Outputs](all, flow); err != nil { + return err + } + } + return nil +} + +func registerOne[Inputs, Outputs any](all map[string]*Flow, flow *Flow) error { + if all[flow.Name] != nil { + return fmt.Errorf("flow %v is already registered", flow.Name) + } + ctx := &verifyContext{ + actions: make(map[string]bool), + state: make(map[string]*varState), + } + provideOutputs[Inputs](ctx, "flow inputs") + flow.Root.verify(ctx) + requireInputs[Outputs](ctx, "flow outputs") + if err := ctx.finalize(); err != nil { + return fmt.Errorf("flow %v: %w", flow.Name, err) + } + all[flow.Name] = flow + return nil +} diff --git a/pkg/aflow/flow/assessment/assessment.go b/pkg/aflow/flow/assessment/assessment.go new file mode 100644 index 000000000000..f0e3dadb7152 --- /dev/null +++ b/pkg/aflow/flow/assessment/assessment.go @@ -0,0 +1,13 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package assessmenet + +// Common inputs for bug assessment when we don't have a reproducer. +type Inputs struct { + CrashReport string + KernelRepo string + KernelCommit string + KernelConfig string + CodesearchToolBin string +} diff --git a/pkg/aflow/flow/assessment/kcsan.go b/pkg/aflow/flow/assessment/kcsan.go new file mode 100644 index 000000000000..755113a47977 --- /dev/null +++ b/pkg/aflow/flow/assessment/kcsan.go @@ -0,0 +1,76 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package assessmenet + +import ( + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/aflow/action/kernel" + "github.com/google/syzkaller/pkg/aflow/ai" + "github.com/google/syzkaller/pkg/aflow/tool/codesearcher" +) + +type KCSANOutputs struct { + Benign bool + Explanation string +} + +func init() { + aflow.Register[Inputs, KCSANOutputs]( + ai.WorkflowAssessmentKCSAN, + "assess if a KCSAN report is about a benign race that only needs annotations or not", + &aflow.Flow{ + Root: &aflow.Pipeline{ + Actions: []aflow.Action{ + kernel.Checkout, + kernel.Build, + codesearcher.PrepareIndex, + &aflow.LLMAgent{ + Name: "expert", + Reply: "Explanation", + Outputs: aflow.LLMOutputs[struct { + Benign bool `jsonschema:"If the data race is benign or not."` + }](), + Temperature: 1, + Instruction: instruction, + Prompt: prompt, + Tools: codesearcher.Tools, + }, + }, + }, + }, + ) +} + +const instruction = ` +You are an experienced Linux kernel developer tasked with determining if the given kernel bug +report is actionable or not. Actionable means that it contains enough info to root cause +the underlying bug, and that the report is self-consistent and makes sense, rather than +a one-off nonsensical crash induced by a previous memory corruption. + +Use the provided tools to confirm any assumptions, what variables/fields being accessed, etc. +In particular, don't make assumptions about the kernel source code, +use codesearch tools to read the actual source code. + +The bug report is a data race report from KCSAN tool. +It contains 2 stack traces of the memory accesses that constitute a data race. +The report would be inconsistent, if the stacks point to different subsystems, +or if they access different fields. +The report would be non-actionable, if the underlysing data race is "benign". +That is, the race is on a simple int/bool or similar field, and the accesses +are not supposed to be protected by any mutual exclusion primitives. +Common examples of such "benign" data races are accesses to various flags fields, +statistics counters, and similar. +An actionable race is "harmful", that is can lead to corruption/crash even with +a conservative compiler that compiles memory accesses to primitive types +effectively as atomic. A common example of a "harmful" data races is race on +a complex container (list/hashmap/etc), where accesses are supposed to be protected +by a mutual exclusion primitive. +In the final reply explain why you think the report is consistent and the data race is harmful. +` + +const prompt = ` +The bug report is: + +{{.CrashReport}} +` diff --git a/pkg/aflow/flow/flows.go b/pkg/aflow/flow/flows.go new file mode 100644 index 000000000000..ef2d4697e08f --- /dev/null +++ b/pkg/aflow/flow/flows.go @@ -0,0 +1,9 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package flow + +import ( + _ "github.com/google/syzkaller/pkg/aflow/flow/assessment" + _ "github.com/google/syzkaller/pkg/aflow/flow/patching" +) diff --git a/pkg/aflow/flow/flows_test.go b/pkg/aflow/flow/flows_test.go new file mode 100644 index 000000000000..3c01596b5471 --- /dev/null +++ b/pkg/aflow/flow/flows_test.go @@ -0,0 +1,6 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package flow + +// An empty test that runs registration and verification of all registered workflows via init functions. diff --git a/pkg/aflow/flow/patching/base_commit.go b/pkg/aflow/flow/patching/base_commit.go new file mode 100644 index 000000000000..f49aba3d6ab0 --- /dev/null +++ b/pkg/aflow/flow/patching/base_commit.go @@ -0,0 +1,57 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package patching + +import ( + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/aflow/action/kernel" + "github.com/google/syzkaller/pkg/vcs" +) + +var baseCommitPicker = aflow.NewFuncAction("base-commit-picker", pickBaseCommit) + +type baseCommitArgs struct { + // Can be used to override the selected base commit (for manual testing). + FixedBaseCommit string +} + +type baseCommitResult struct { + KernelRepo string + KernelCommit string +} + +func pickBaseCommit(ctx *aflow.Context, args baseCommitArgs) (baseCommitResult, error) { + // Currently we use the latest RC of the mainline tree as the base. + // This is a reasonable choice overall in lots of cases, and it enables good caching + // of all artifacts (we need to rebuild them only approx every week). + // Potentially we can use subsystem trees for few important, well-maintained subsystems + // (mm, net, etc). However, it will work poorly for all subsystems. First, there is no + // machine-usable mapping of subsystems to repo/branch; second, lots of them are poorly + // maintained (can be much older than latest RC); third, it will make artifact caching + // much worse. + // In the future we ought to support automated rebasing of patches to requested trees/commits. + // We need it anyway, but it will also alleviate imperfect base commit picking. + const ( + baseRepo = "git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git" + baseBranch = "master" + ) + + res := baseCommitResult{ + KernelRepo: baseRepo, + KernelCommit: args.FixedBaseCommit, + } + if args.FixedBaseCommit != "" { + return res, nil + } + + err := kernel.UseLinuxRepo(ctx, func(_ string, repo vcs.Repo) error { + head, err := repo.Poll(baseRepo, baseBranch) + if err != nil { + return err + } + res.KernelCommit, err = repo.ReleaseTag(head.Hash) + return err + }) + return res, err +} diff --git a/pkg/aflow/flow/patching/patching.go b/pkg/aflow/flow/patching/patching.go new file mode 100644 index 000000000000..ef10f1a2edba --- /dev/null +++ b/pkg/aflow/flow/patching/patching.go @@ -0,0 +1,138 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package patching + +import ( + "encoding/json" + + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/aflow/action/crash" + "github.com/google/syzkaller/pkg/aflow/action/kernel" + "github.com/google/syzkaller/pkg/aflow/ai" + "github.com/google/syzkaller/pkg/aflow/tool/codesearcher" +) + +type Inputs struct { + ReproOpts string + ReproSyz string + ReproC string + KernelConfig string + SyzkallerCommit string + CodesearchToolBin string + + // Same as in the manager config. + Syzkaller string + Image string + Type string + VM json.RawMessage + + // Use this fixed based kernel commit (for testing/local running). + FixedBaseCommit string +} + +type Outputs struct { + PatchDescription string + PatchDiff string +} + +func init() { + tools := codesearcher.Tools + + aflow.Register[Inputs, Outputs]( + ai.WorkflowPatching, + "generate a kernel patch fixing a provided bug reproducer", + &aflow.Flow{ + Root: &aflow.Pipeline{ + Actions: []aflow.Action{ + baseCommitPicker, + kernel.Checkout, + kernel.Build, + // Ensure we can reproduce the crash (and the build boots). + crash.Reproduce, + codesearcher.PrepareIndex, + &aflow.LLMAgent{ + Name: "debugger", + Reply: "BugExplanation", + Temperature: 1, + Instruction: debuggingInstruction, + Prompt: debuggingPrompt, + Tools: tools, + }, + &aflow.LLMAgent{ + Name: "diff-generator", + Reply: "PatchDiff", + Temperature: 1, + Instruction: diffInstruction, + Prompt: diffPrompt, + Tools: tools, + }, + &aflow.LLMAgent{ + Name: "description-generator", + Reply: "PatchDescription", + Temperature: 1, + Instruction: descriptionInstruction, + Prompt: descriptionPrompt, + }, + }, + }, + }, + ) +} + +// TODO: mention not doing assumptions about the source code, and instead querying code using tools. +// TODO: mention to extensively use provided tools to confirm everything. +// TODO: use cause bisection info, if available. + +const debuggingInstruction = ` +You are an experienced Linux kernel developer tasked with debugging a kernel crash root cause. +You need to provide a detailed explanation of the root cause for another developer to be +able to write a fix for the bug based on your explanation. +Your final reply must contain only the explanation. + +Call some codesearch tools first. +` + +const debuggingPrompt = ` +The crash is: + +{{.CrashReport}} +` + +const diffInstruction = ` +You are an experienced Linux kernel developer tasked with creating a patch for a kernel bug. +Your final reply should contain only the code diff in patch format. +` + +const diffPrompt = ` +The crash that corresponds to the bug is: + +{{.CrashReport}} + +The explanation of the root cause of the bug is: + +{{.BugExplanation}} +` + +const descriptionInstruction = ` +You are an experienced Linux kernel developer tasked with writing a commit description for +a kernel bug fixing commit. The description should start with a one-line summary, +and then include description of the bug being fixed, and how it's fixed by the provided patch. +Your final reply should contain only the text of the commit description. +Phrase the one-line summary so that it is not longer than 72 characters. +The rest of the description must be word-wrapped at 72 characters. +` + +const descriptionPrompt = ` +The crash that corresponds to the bug is: + +{{.CrashReport}} + +The explanation of the root cause of the bug is: + +{{.BugExplanation}} + +The diff of the bug fix is: + +{{.PatchDiff}} +` diff --git a/pkg/aflow/flow_test.go b/pkg/aflow/flow_test.go new file mode 100644 index 000000000000..8ab8016f3437 --- /dev/null +++ b/pkg/aflow/flow_test.go @@ -0,0 +1,554 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/google/syzkaller/pkg/aflow/trajectory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/genai" +) + +func TestWorkflow(t *testing.T) { + type flowInputs struct { + InFoo int + InBar string + InBaz string + } + type flowOutputs struct { + OutFoo string + OutBar int + OutBaz string + AgentFoo int + } + type firstFuncInputs struct { + InFoo int + InBar string + } + type firstFuncOutputs struct { + TmpFuncOutput string + OutBar int + } + type secondFuncInputs struct { + AgentBar string + TmpFuncOutput string + InFoo int + } + type secondFuncOutputs struct { + OutBaz string + } + type agentOutputs struct { + AgentFoo int `jsonschema:"foo"` + AgentBar string `jsonschema:"bar"` + } + type tool1State struct { + InFoo int + TmpFuncOutput string + } + type tool1Args struct { + ArgFoo string `jsonschema:"foo"` + ArgBar int `jsonschema:"bar"` + } + type tool1Results struct { + ResFoo int `jsonschema:"foo"` + ResString string `jsonschema:"string"` + } + type tool2State struct { + InFoo int + } + type tool2Args struct { + ArgBaz int `jsonschema:"baz"` + } + type tool2Results struct { + ResBaz int `jsonschema:"baz"` + } + inputs := map[string]any{ + "InFoo": 10, + "InBar": "bar", + "InBaz": "baz", + } + flows := make(map[string]*Flow) + err := register[flowInputs, flowOutputs]("test", "description", flows, []*Flow{ + { + Name: "flow", + Root: NewPipeline( + NewFuncAction("func-action", + func(ctx *Context, args firstFuncInputs) (firstFuncOutputs, error) { + assert.Equal(t, args.InFoo, 10) + assert.Equal(t, args.InBar, "bar") + return firstFuncOutputs{ + TmpFuncOutput: "func-output", + OutBar: 142, + }, nil + }), + &LLMAgent{ + Name: "smarty", + Reply: "OutFoo", + Outputs: LLMOutputs[agentOutputs](), + Temperature: 0, + Instruction: "You are smarty. {{.InBaz}}", + Prompt: "Prompt: {{.InBaz}} {{.TmpFuncOutput}}", + Tools: []Tool{ + NewFuncTool("tool1", func(ctx *Context, state tool1State, args tool1Args) (tool1Results, error) { + assert.Equal(t, state.InFoo, 10) + assert.Equal(t, state.TmpFuncOutput, "func-output") + assert.Equal(t, args.ArgFoo, "arg-foo") + assert.Equal(t, args.ArgBar, 100) + return tool1Results{ + ResFoo: 200, + ResString: "res-string", + }, nil + }, "tool 1 description"), + NewFuncTool("tool2", func(ctx *Context, state tool2State, args tool2Args) (tool2Results, error) { + assert.Equal(t, state.InFoo, 10) + assert.Equal(t, args.ArgBaz, 101) + return tool2Results{ + ResBaz: 300, + }, nil + }, "tool 2 description"), + }, + }, + NewFuncAction("another-action", + func(ctx *Context, args secondFuncInputs) (secondFuncOutputs, error) { + assert.Equal(t, args.AgentBar, "agent-bar") + assert.Equal(t, args.TmpFuncOutput, "func-output") + assert.Equal(t, args.InFoo, 10) + return secondFuncOutputs{ + OutBaz: "baz", + }, nil + }), + ), + }, + }) + require.NoError(t, err) + var startTime time.Time + stubTime := startTime + replySeq := 0 + stub := &stubContext{ + timeNow: func() time.Time { + stubTime = stubTime.Add(time.Second) + return stubTime + }, + generateContent: func(cfg *genai.GenerateContentConfig, req []*genai.Content) ( + *genai.GenerateContentResponse, error) { + assert.Equal(t, cfg.SystemInstruction, genai.NewContentFromText(`You are smarty. baz + +Use set-results tool to provide results of the analysis. +It must be called exactly once before the final reply. +Ignore results of this tool. +`, genai.RoleUser)) + assert.Equal(t, cfg.Temperature, genai.Ptr[float32](0)) + assert.Equal(t, len(cfg.Tools), 3) + assert.Equal(t, cfg.Tools[0].FunctionDeclarations[0].Name, "tool1") + assert.Equal(t, cfg.Tools[0].FunctionDeclarations[0].Description, "tool 1 description") + assert.Equal(t, cfg.Tools[1].FunctionDeclarations[0].Name, "tool2") + assert.Equal(t, cfg.Tools[1].FunctionDeclarations[0].Description, "tool 2 description") + assert.Equal(t, cfg.Tools[2].FunctionDeclarations[0].Name, "set-results") + + reply1 := &genai.Content{ + Role: string(genai.RoleModel), + Parts: []*genai.Part{ + { + FunctionCall: &genai.FunctionCall{ + ID: "id0", + Name: "tool1", + Args: map[string]any{ + "ArgFoo": "arg-foo", + "ArgBar": 100, + }, + }, + }, + { + FunctionCall: &genai.FunctionCall{ + ID: "id1", + Name: "tool2", + Args: map[string]any{ + "ArgBaz": 101, + }, + }, + }, + { + Text: "I am thinking I need to call some tools", + Thought: true, + }, + }} + resp1 := &genai.Content{ + Role: string(genai.RoleUser), + Parts: []*genai.Part{ + { + FunctionResponse: &genai.FunctionResponse{ + ID: "id0", + Name: "tool1", + Response: map[string]any{ + "ResFoo": 200, + "ResString": "res-string", + }, + }, + }, + { + FunctionResponse: &genai.FunctionResponse{ + ID: "id1", + Name: "tool2", + Response: map[string]any{ + "ResBaz": 300, + }, + }, + }, + }} + reply2 := &genai.Content{ + Role: string(genai.RoleModel), + Parts: []*genai.Part{ + { + FunctionCall: &genai.FunctionCall{ + ID: "id2", + Name: "set-results", + Args: map[string]any{ + "AgentFoo": 42, + "AgentBar": "agent-bar", + }, + }, + }, + { + Text: "Completly blank.", + Thought: true, + }, + { + Text: "Whatever.", + Thought: true, + }, + }} + resp2 := &genai.Content{ + Role: string(genai.RoleUser), + Parts: []*genai.Part{ + { + FunctionResponse: &genai.FunctionResponse{ + ID: "id2", + Name: "set-results", + Response: map[string]any{ + "AgentFoo": 42, + "AgentBar": "agent-bar", + }, + }, + }, + }} + + replySeq++ + switch replySeq { + case 1: + assert.Equal(t, req, []*genai.Content{ + genai.NewContentFromText("Prompt: baz func-output", genai.RoleUser), + }) + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{Content: reply1}}}, nil + case 2: + assert.Equal(t, req, []*genai.Content{ + genai.NewContentFromText("Prompt: baz func-output", genai.RoleUser), + reply1, + resp1, + }) + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{Content: reply2}}}, nil + case 3: + assert.Equal(t, req, []*genai.Content{ + genai.NewContentFromText("Prompt: baz func-output", genai.RoleUser), + reply1, + resp1, + reply2, + resp2, + }) + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{ + {Content: &genai.Content{ + Role: string(genai.RoleUser), + Parts: []*genai.Part{ + genai.NewPartFromText("hello, world!")}, + }}}}, nil + + default: + t.Fatal("unexpected LLM calls") + return nil, nil + } + }, + } + ctx := context.WithValue(context.Background(), stubContextKey, stub) + workdir := t.TempDir() + cache, err := newTestCache(t, filepath.Join(workdir, "cache"), 0, stub.timeNow) + require.NoError(t, err) + // nolint: dupl + expected := []*trajectory.Span{ + { + Seq: 0, + Nesting: 0, + Type: trajectory.SpanFlow, + Name: "test-flow", + Started: startTime.Add(1 * time.Second), + }, + { + Seq: 1, + Nesting: 1, + Type: trajectory.SpanAction, + Name: "func-action", + Started: startTime.Add(2 * time.Second), + }, + { + Seq: 1, + Nesting: 1, + Type: trajectory.SpanAction, + Name: "func-action", + Started: startTime.Add(2 * time.Second), + Finished: startTime.Add(3 * time.Second), + Results: map[string]any{ + "TmpFuncOutput": "func-output", + "OutBar": 142, + }, + }, + { + Seq: 2, + Nesting: 1, + Type: trajectory.SpanAgent, + Name: "smarty", + Started: startTime.Add(4 * time.Second), + Instruction: `You are smarty. baz + +Use set-results tool to provide results of the analysis. +It must be called exactly once before the final reply. +Ignore results of this tool. +`, + Prompt: "Prompt: baz func-output", + }, + { + Seq: 3, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(5 * time.Second), + }, + { + Seq: 3, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(5 * time.Second), + Finished: startTime.Add(6 * time.Second), + Thoughts: "I am thinking I need to call some tools", + }, + { + Seq: 4, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "tool1", + Started: startTime.Add(7 * time.Second), + Args: map[string]any{ + "ArgBar": 100, + "ArgFoo": "arg-foo", + }, + }, + { + Seq: 4, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "tool1", + Started: startTime.Add(7 * time.Second), + Finished: startTime.Add(8 * time.Second), + Args: map[string]any{ + "ArgBar": 100, + "ArgFoo": "arg-foo", + }, + Results: map[string]any{ + "ResFoo": 200, + "ResString": "res-string", + }, + }, + { + Seq: 5, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "tool2", + Started: startTime.Add(9 * time.Second), + Args: map[string]any{ + "ArgBaz": 101, + }, + }, + { + Seq: 5, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "tool2", + Started: startTime.Add(9 * time.Second), + Finished: startTime.Add(10 * time.Second), + Args: map[string]any{ + "ArgBaz": 101, + }, + Results: map[string]any{ + "ResBaz": 300, + }, + }, + { + Seq: 6, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(11 * time.Second), + }, + { + Seq: 6, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(11 * time.Second), + Finished: startTime.Add(12 * time.Second), + Thoughts: "Completly blank.Whatever.", + }, + { + Seq: 7, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "set-results", + Started: startTime.Add(13 * time.Second), + Args: map[string]any{ + "AgentBar": "agent-bar", + "AgentFoo": 42, + }, + }, + { + Seq: 7, + Nesting: 2, + Type: trajectory.SpanTool, + Name: "set-results", + Started: startTime.Add(13 * time.Second), + Finished: startTime.Add(14 * time.Second), + Args: map[string]any{ + "AgentBar": "agent-bar", + "AgentFoo": 42, + }, + Results: map[string]any{ + "AgentBar": "agent-bar", + "AgentFoo": 42, + }, + }, + { + Seq: 8, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(15 * time.Second), + }, + { + Seq: 8, + Nesting: 2, + Type: trajectory.SpanLLM, + Name: "smarty", + Started: startTime.Add(15 * time.Second), + Finished: startTime.Add(16 * time.Second), + }, + { + Seq: 2, + Nesting: 1, + Type: trajectory.SpanAgent, + Name: "smarty", + Started: startTime.Add(4 * time.Second), + Finished: startTime.Add(17 * time.Second), + Instruction: `You are smarty. baz + +Use set-results tool to provide results of the analysis. +It must be called exactly once before the final reply. +Ignore results of this tool. +`, + Prompt: "Prompt: baz func-output", + Reply: "hello, world!", + Results: map[string]any{ + "AgentBar": "agent-bar", + "AgentFoo": 42, + }, + }, + { + Seq: 9, + Nesting: 1, + Type: trajectory.SpanAction, + Name: "another-action", + Started: startTime.Add(18 * time.Second), + }, + { + Seq: 9, + Nesting: 1, + Type: trajectory.SpanAction, + Name: "another-action", + Started: startTime.Add(18 * time.Second), + Finished: startTime.Add(19 * time.Second), + Results: map[string]any{ + "OutBaz": "baz", + }, + }, + { + Seq: 0, + Nesting: 0, + Type: trajectory.SpanFlow, + Name: "test-flow", + Started: startTime.Add(1 * time.Second), + Finished: startTime.Add(20 * time.Second), + Results: map[string]any{ + "AgentFoo": 42, + "OutBar": 142, + "OutBaz": "baz", + "OutFoo": "hello, world!", + }, + }, + } + onEvent := func(span *trajectory.Span) error { + require.NotEmpty(t, expected) + require.Equal(t, span, expected[0]) + expected = expected[1:] + return nil + } + res, err := flows["test-flow"].Execute(ctx, "model", workdir, inputs, cache, onEvent) + require.NoError(t, err) + require.Equal(t, res, map[string]any{ + "OutFoo": "hello, world!", + "OutBar": 142, + "OutBaz": "baz", + "AgentFoo": 42, + }) + require.Empty(t, expected) +} + +func TestNoInputs(t *testing.T) { + type flowInputs struct { + InFoo int + InBar string + } + type flowOutputs struct { + } + inputs := map[string]any{ + "InFoo": 10, + } + flows := make(map[string]*Flow) + err := register[flowInputs, flowOutputs]("test", "description", flows, []*Flow{ + { + Root: NewFuncAction("func-action", + func(ctx *Context, args flowInputs) (flowOutputs, error) { + return flowOutputs{}, nil + }), + }, + }) + require.NoError(t, err) + stub := &stubContext{ + generateContent: func(cfg *genai.GenerateContentConfig, req []*genai.Content) ( + *genai.GenerateContentResponse, error) { + return nil, nil + }, + } + ctx := context.WithValue(context.Background(), stubContextKey, stub) + workdir := t.TempDir() + cache, err := newTestCache(t, filepath.Join(workdir, "cache"), 0, stub.timeNow) + require.NoError(t, err) + onEvent := func(span *trajectory.Span) error { return nil } + _, err = flows["test"].Execute(ctx, "model", workdir, inputs, cache, onEvent) + require.Equal(t, err.Error(), "flow inputs are missing:"+ + " field InBar is not present when converting map to aflow.flowInputs") +} diff --git a/pkg/aflow/func_action.go b/pkg/aflow/func_action.go new file mode 100644 index 000000000000..a545793204d8 --- /dev/null +++ b/pkg/aflow/func_action.go @@ -0,0 +1,46 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "maps" + + "github.com/google/syzkaller/pkg/aflow/trajectory" +) + +func NewFuncAction[Args, Results any](name string, fn func(*Context, Args) (Results, error)) Action { + return &funcAction[Args, Results]{ + name: name, + fn: fn, + } +} + +type funcAction[Args, Results any] struct { + name string + fn func(*Context, Args) (Results, error) +} + +func (a *funcAction[Args, Results]) execute(ctx *Context) error { + args, err := convertFromMap[Args](ctx.state, false) + if err != nil { + return err + } + span := &trajectory.Span{ + Type: trajectory.SpanAction, + Name: a.name, + } + if err := ctx.startSpan(span); err != nil { + return err + } + res, fnErr := a.fn(ctx, args) + span.Results = convertToMap(res) + maps.Insert(ctx.state, maps.All(span.Results)) + return ctx.finishSpan(span, fnErr) +} + +func (a *funcAction[Args, Results]) verify(vctx *verifyContext) { + vctx.requireNotEmpty(a.name, "Name", a.name) + requireInputs[Args](vctx, a.name) + provideOutputs[Results](vctx, a.name) +} diff --git a/pkg/aflow/func_tool.go b/pkg/aflow/func_tool.go new file mode 100644 index 000000000000..cd069db840dd --- /dev/null +++ b/pkg/aflow/func_tool.go @@ -0,0 +1,71 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "github.com/google/syzkaller/pkg/aflow/trajectory" + "google.golang.org/genai" +) + +// NewFuncTool creates a new tool based on a custom function that an LLM agent can use. +// Name and description are important since they are passed to an LLM agent. +// Args and Results must be structs with fields commented with aflow tag, +// comments are also important since they are passed to the LLM agent. +// Args are accepted from the LLM agent on the tool invocation, Results are returned +// to the LLM agent. State fields are taken from the current execution state +// (they are not exposed to the LLM agent). +func NewFuncTool[State, Args, Results any](name string, fn func(*Context, State, Args) (Results, error), + description string) Tool { + return &funcTool[State, Args, Results]{ + Name: name, + Description: description, + Func: fn, + } +} + +type funcTool[State, Args, Results any] struct { + Name string + Description string + Func func(*Context, State, Args) (Results, error) +} + +func (t *funcTool[State, Args, Results]) declaration() *genai.FunctionDeclaration { + return &genai.FunctionDeclaration{ + Name: t.Name, + Description: t.Description, + ParametersJsonSchema: mustSchemaFor[Args](), + ResponseJsonSchema: mustSchemaFor[Results](), + } +} + +func (t *funcTool[State, Args, Results]) execute(ctx *Context, args map[string]any) (map[string]any, error) { + state, err := convertFromMap[State](ctx.state, false) + if err != nil { + return nil, err + } + a, err := convertFromMap[Args](args, true) + if err != nil { + return nil, err + } + span := &trajectory.Span{ + Type: trajectory.SpanTool, + Name: t.Name, + Args: args, + } + if err := ctx.startSpan(span); err != nil { + return nil, err + } + res, err := t.Func(ctx, state, a) + span.Results = convertToMap(res) + err = ctx.finishSpan(span, err) + return span.Results, err +} + +func (t *funcTool[State, Args, Results]) verify(ctx *verifyContext) { + ctx.requireNotEmpty(t.Name, "Name", t.Name) + ctx.requireNotEmpty(t.Name, "Description", t.Description) + requireSchema[Args](ctx, t.Name, "Args") + requireSchema[Results](ctx, t.Name, "Results") + requireInputs[State](ctx, t.Name) +} diff --git a/pkg/aflow/llm_agent.go b/pkg/aflow/llm_agent.go new file mode 100644 index 000000000000..76661add6f88 --- /dev/null +++ b/pkg/aflow/llm_agent.go @@ -0,0 +1,254 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + "maps" + "reflect" + + "github.com/google/syzkaller/pkg/aflow/trajectory" + "google.golang.org/genai" +) + +type LLMAgent struct { + // For logging/debugging. + Name string + // Name of the state variable to store the final reply of the agent. + // These names can be used in subsequent action instructions/prompts, + // and as final workflow outputs. + Reply string + // Optional additional structured outputs besides the final text reply. + // Use LLMOutputs function to create it. + Outputs *llmOutputs + // Value that controls the degree of randomness in token selection. + // Lower temperatures are good for prompts that require a less open-ended or creative response, + // while higher temperatures can lead to more diverse or creative results. + // Must be assigned a float32 value in the range [0, 2]. + Temperature any + // Instructions for the agent. + // Formatted as text/template, can use "{{.Variable}}" as placeholders for dynamic content. + // Variables can come from the workflow inputs, or from preceding actions outputs. + Instruction string + // Prompt for the agent. The same format as Instruction. + Prompt string + // Set of tools for the agent to use. + Tools []Tool +} + +// Tool represents a custom tool an LLMAgent can invoke. +// Use NewFuncTool to create function-based tools. +type Tool interface { + verify(*verifyContext) + declaration() *genai.FunctionDeclaration + execute(*Context, map[string]any) (map[string]any, error) +} + +// LLMOutputs creates a special tool that can be used by LLM to provide structured outputs. +func LLMOutputs[Args any]() *llmOutputs { + return &llmOutputs{ + tool: NewFuncTool("set-results", func(ctx *Context, state struct{}, args Args) (Args, error) { + return args, nil + }, "Use this tool to provide results of the analysis."), + provideOutputs: func(ctx *verifyContext, who string) { + provideOutputs[Args](ctx, who) + }, + instruction: ` + +Use set-results tool to provide results of the analysis. +It must be called exactly once before the final reply. +Ignore results of this tool. +`, + } +} + +type llmOutputs struct { + tool Tool + provideOutputs func(*verifyContext, string) + instruction string +} + +func (a *LLMAgent) execute(ctx *Context) error { + cfg, instruction, tools := a.config(ctx) + span := &trajectory.Span{ + Type: trajectory.SpanAgent, + Name: a.Name, + Instruction: instruction, + Prompt: formatTemplate(a.Prompt, ctx.state), + } + if err := ctx.startSpan(span); err != nil { + return err + } + reply, outputs, err := a.chat(ctx, cfg, tools, span.Prompt) + span.Reply = reply + span.Results = outputs + return ctx.finishSpan(span, err) +} + +func (a *LLMAgent) chat(ctx *Context, cfg *genai.GenerateContentConfig, tools map[string]Tool, prompt string) ( + string, map[string]any, error) { + var outputs map[string]any + req := []*genai.Content{genai.NewContentFromText(prompt, genai.RoleUser)} + for { + reqSpan := &trajectory.Span{ + Type: trajectory.SpanLLM, + Name: a.Name, + } + if err := ctx.startSpan(reqSpan); err != nil { + return "", nil, err + } + resp, err := ctx.generateContent(cfg, req) + if err != nil { + return "", nil, ctx.finishSpan(reqSpan, err) + } + reply, thoughts, calls, respErr := a.parseResponse(resp) + reqSpan.Thoughts = thoughts + if err := ctx.finishSpan(reqSpan, respErr); err != nil { + return "", nil, err + } + if len(calls) == 0 { + // This is the final reply. + if a.Outputs != nil && outputs == nil { + return "", nil, fmt.Errorf("LLM did not call tool to set outputs") + } + ctx.state[a.Reply] = reply + maps.Insert(ctx.state, maps.All(outputs)) + return reply, outputs, nil + } + // This is not the final reply, LLM asked to execute some tools. + // Append the current reply, and tool responses to the next request. + responses, outputs1, err := a.callTools(ctx, tools, calls) + if err != nil { + return "", nil, err + } + if outputs != nil && outputs1 != nil { + return "", nil, fmt.Errorf("LLM called outputs tool twice") + } + outputs = outputs1 + req = append(req, resp.Candidates[0].Content, responses) + } +} + +func (a *LLMAgent) config(ctx *Context) (*genai.GenerateContentConfig, string, map[string]Tool) { + instruction := formatTemplate(a.Instruction, ctx.state) + toolList := a.Tools + if a.Outputs != nil { + instruction += a.Outputs.instruction + toolList = append(toolList, a.Outputs.tool) + } + toolMap := make(map[string]Tool) + var tools []*genai.Tool + for _, tool := range toolList { + decl := tool.declaration() + toolMap[decl.Name] = tool + tools = append(tools, &genai.Tool{ + FunctionDeclarations: []*genai.FunctionDeclaration{decl}}) + } + return &genai.GenerateContentConfig{ + ResponseModalities: []string{"TEXT"}, + Temperature: genai.Ptr(a.Temperature.(float32)), + SystemInstruction: genai.NewContentFromText(instruction, genai.RoleUser), + Tools: tools, + }, instruction, toolMap +} + +func (a *LLMAgent) callTools(ctx *Context, tools map[string]Tool, calls []*genai.FunctionCall) ( + *genai.Content, map[string]any, error) { + responses := &genai.Content{ + Role: string(genai.RoleUser), + } + var outputs map[string]any + for _, call := range calls { + tool := tools[call.Name] + if tool == nil { + return nil, nil, fmt.Errorf("no tool %q", call.Name) + } + results, err := tool.execute(ctx, call.Args) + if err != nil { + return nil, nil, err + } + responses.Parts = append(responses.Parts, genai.NewPartFromFunctionResponse(call.Name, results)) + responses.Parts[len(responses.Parts)-1].FunctionResponse.ID = call.ID + if a.Outputs != nil && tool == a.Outputs.tool { + outputs = results + } + } + return responses, outputs, nil +} + +func (a *LLMAgent) parseResponse(resp *genai.GenerateContentResponse) ( + reply, thoughts string, calls []*genai.FunctionCall, err error) { + if len(resp.Candidates) == 0 || resp.Candidates[0] == nil { + err = fmt.Errorf("empty model response") + if resp.PromptFeedback != nil { + err = fmt.Errorf("request blocked: %v", resp.PromptFeedback.BlockReasonMessage) + } + return + } + candidate := resp.Candidates[0] + if candidate.Content == nil || len(candidate.Content.Parts) == 0 { + err = fmt.Errorf("%v (%v)", candidate.FinishMessage, candidate.FinishReason) + return + } + // We don't expect to receive these now. + if candidate.GroundingMetadata != nil || candidate.CitationMetadata != nil || + candidate.LogprobsResult != nil { + err = fmt.Errorf("unexpected reply fields (%+v)", *candidate) + return + } + for _, part := range candidate.Content.Parts { + // We don't expect to receive these now. + if part.VideoMetadata != nil || part.InlineData != nil || + part.FileData != nil || part.FunctionResponse != nil || + part.CodeExecutionResult != nil || part.ExecutableCode != nil { + err = fmt.Errorf("unexpected reply part (%+v)", *part) + return + } + if part.FunctionCall != nil { + calls = append(calls, part.FunctionCall) + } else if part.Thought { + thoughts += part.Text + } else { + reply += part.Text + } + } + return +} + +func (a *LLMAgent) verify(vctx *verifyContext) { + vctx.requireNotEmpty(a.Name, "Name", a.Name) + vctx.requireNotEmpty(a.Name, "Reply", a.Reply) + if temp, ok := a.Temperature.(int); ok { + a.Temperature = float32(temp) + } + if temp, ok := a.Temperature.(float32); !ok || temp < 0 || temp > 2 { + vctx.errorf(a.Name, "Temperature must have a float32 value in the range [0, 2]") + } + // Verify dataflow. All dynamic variables must be provided by inputs, + // or preceding actions. + a.verifyTemplate(vctx, "Instruction", a.Instruction) + a.verifyTemplate(vctx, "Prompt", a.Prompt) + for _, tool := range a.Tools { + tool.verify(vctx) + } + vctx.provideOutput(a.Name, a.Reply, reflect.TypeFor[string](), true) + if a.Outputs != nil { + a.Outputs.provideOutputs(vctx, a.Name) + } +} + +func (a *LLMAgent) verifyTemplate(vctx *verifyContext, what, text string) { + vctx.requireNotEmpty(a.Name, what, text) + vars := make(map[string]reflect.Type) + for name, state := range vctx.state { + vars[name] = state.typ + } + used, err := verifyTemplate(text, vars) + if err != nil { + vctx.errorf(a.Name, "%v: %v", what, err) + } + for name := range used { + vctx.state[name].used = true + } +} diff --git a/pkg/aflow/schema.go b/pkg/aflow/schema.go new file mode 100644 index 000000000000..e34d465ead0c --- /dev/null +++ b/pkg/aflow/schema.go @@ -0,0 +1,102 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "encoding/json" + "fmt" + "iter" + "maps" + "reflect" + + "github.com/google/jsonschema-go/jsonschema" +) + +func schemaFor[T any]() (*jsonschema.Schema, error) { + typ := reflect.TypeFor[T]() + if typ.Kind() != reflect.Struct { + return nil, fmt.Errorf("%v is not a struct", typ.Name()) + } + for _, field := range reflect.VisibleFields(typ) { + if field.Tag.Get("jsonschema") == "" { + return nil, fmt.Errorf("%v.%v does not have a jsonschema tag with description", + typ.Name(), field.Name) + } + } + schema, err := jsonschema.For[T](nil) + if err != nil { + return nil, err + } + resolved, err := schema.Resolve(nil) + if err != nil { + return nil, err + } + return resolved.Schema(), nil +} + +func mustSchemaFor[T any]() *jsonschema.Schema { + schema, err := schemaFor[T]() + if err != nil { + panic(err) + } + return schema +} + +func convertToMap[T any](val T) map[string]any { + res := make(map[string]any) + for name, val := range foreachField(&val) { + res[name] = val.Interface() + } + return res +} + +// convertFromMap converts an untyped map to a struct. +// It always ensures that all struct fields are present in the map. +// In the strict mode it also checks that the map does not contain any other unused elements. +func convertFromMap[T any](m map[string]any, strict bool) (T, error) { + m = maps.Clone(m) + var val T + for name, field := range foreachField(&val) { + f, ok := m[name] + if !ok { + return val, fmt.Errorf("field %v is not present when converting map to %T", name, val) + } + delete(m, name) + if mm, ok := f.(map[string]any); ok && field.Type() == reflect.TypeFor[json.RawMessage]() { + raw, err := json.Marshal(mm) + if err != nil { + return val, err + } + field.Set(reflect.ValueOf(json.RawMessage(raw))) + } else { + field.Set(reflect.ValueOf(f)) + } + } + if strict && len(m) != 0 { + return val, fmt.Errorf("unused fields when converting map to %T: %v", val, m) + } + return val, nil +} + +// foreachField iterates over all public fields of the struct provided in data. +func foreachField(data any) iter.Seq2[string, reflect.Value] { + return func(yield func(string, reflect.Value) bool) { + v := reflect.ValueOf(data).Elem() + for _, field := range reflect.VisibleFields(v.Type()) { + if !yield(field.Name, v.FieldByIndex(field.Index)) { + break + } + } + } +} + +func foreachFieldOf[T any]() iter.Seq2[string, reflect.Type] { + return func(yield func(string, reflect.Type) bool) { + for name, val := range foreachField(new(T)) { + if !yield(name, val.Type()) { + break + } + } + } +} diff --git a/pkg/aflow/schema_test.go b/pkg/aflow/schema_test.go new file mode 100644 index 000000000000..ac441f7d62da --- /dev/null +++ b/pkg/aflow/schema_test.go @@ -0,0 +1,50 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + "testing" + + "github.com/google/jsonschema-go/jsonschema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSchema(t *testing.T) { + type Test struct { + fn func() (*jsonschema.Schema, error) + err string + } + type structWithNoTags struct { + A int + } + type structWithTags struct { + A int `jsonschema:"aaa"` + B string `jsonschema:"bbb"` + } + tests := []Test{ + { + fn: schemaFor[int], + err: "int is not a struct", + }, + { + fn: schemaFor[structWithNoTags], + err: "structWithNoTags.A does not have a jsonschema tag with description", + }, + { + fn: schemaFor[structWithTags], + }, + } + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + _, err := test.fn() + if err != nil { + assert.Equal(t, err.Error(), test.err) + return + } + require.Empty(t, test.err) + }) + } +} diff --git a/pkg/aflow/template.go b/pkg/aflow/template.go new file mode 100644 index 000000000000..7b0efd194471 --- /dev/null +++ b/pkg/aflow/template.go @@ -0,0 +1,100 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strings" + "text/template" + "text/template/parse" +) + +// formatTemplate formats template 'text' using the standard text/template logic. +// Panics on any errors, but these panics shouldn't happen if verifyTemplate +// was called for the template before. +func formatTemplate(text string, state map[string]any) string { + templ, err := parseTemplate(text) + if err != nil { + panic(err) + } + w := new(bytes.Buffer) + if err := templ.Execute(w, state); err != nil { + panic(err) + } + return w.String() +} + +// verifyTemplate checks that the template 'text' can be executed with the variables 'vars'. +// Returns the set of variables that are actually used in the template. +func verifyTemplate(text string, vars map[string]reflect.Type) (map[string]bool, error) { + templ, err := parseTemplate(text) + if err != nil { + return nil, err + } + used := make(map[string]bool) + walkTemplate(templ.Root, used, &err) + if err != nil { + return nil, err + } + vals := make(map[string]any) + for name := range used { + typ, ok := vars[name] + if !ok { + return nil, fmt.Errorf("input %v is not provided", name) + } + vals[name] = reflect.Zero(typ).Interface() + } + // Execute once just to make sure. + if err := templ.Execute(io.Discard, vals); err != nil { + return nil, err + } + return used, nil +} + +// walkTemplate recursively walks template nodes collecting used variables. +// It does not handle all node types, but enough to support reasonably simple templates. +func walkTemplate(n parse.Node, used map[string]bool, errp *error) { + if reflect.ValueOf(n).IsNil() { + return + } + switch n := n.(type) { + case *parse.ListNode: + for _, c := range n.Nodes { + walkTemplate(c, used, errp) + } + case *parse.IfNode: + walkTemplate(n.Pipe, used, errp) + walkTemplate(n.List, used, errp) + walkTemplate(n.ElseList, used, errp) + case *parse.ActionNode: + walkTemplate(n.Pipe, used, errp) + case *parse.PipeNode: + for _, c := range n.Decl { + walkTemplate(c, used, errp) + } + for _, c := range n.Cmds { + walkTemplate(c, used, errp) + } + case *parse.CommandNode: + for _, c := range n.Args { + walkTemplate(c, used, errp) + } + case *parse.FieldNode: + if len(n.Ident) != 1 { + noteError(errp, "compound values are not supported: .%v", strings.Join(n.Ident, ".")) + } + used[n.Ident[0]] = true + case *parse.VariableNode: + case *parse.TextNode: + default: + noteError(errp, "unhandled node type %T", n) + } +} + +func parseTemplate(prompt string) (*template.Template, error) { + return template.New("").Option("missingkey=error").Parse(prompt) +} diff --git a/pkg/aflow/template_test.go b/pkg/aflow/template_test.go new file mode 100644 index 000000000000..e42ddd2c3275 --- /dev/null +++ b/pkg/aflow/template_test.go @@ -0,0 +1,68 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + "maps" + "reflect" + "slices" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTemplate(t *testing.T) { + type Test struct { + template string + vars map[string]reflect.Type + used []string + err string + } + tests := []Test{ + { + template: `just text`, + }, + { + template: ` + {{if .bar}} + {{.foo}} + {{end}} + + {{if $local := .bar}} + {{$local}} + {{end}} + `, + vars: map[string]reflect.Type{ + "bar": reflect.TypeFor[bool](), + "foo": reflect.TypeFor[int](), + "baz": reflect.TypeFor[int](), + }, + used: []string{"bar", "foo"}, + }, + { + template: ` + {{if .bar}} + {{.foo}} + {{end}} + `, + vars: map[string]reflect.Type{ + "bar": reflect.TypeFor[bool](), + }, + err: "input foo is not provided", + }, + } + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + used, err := verifyTemplate(test.template, test.vars) + if err != nil { + assert.Equal(t, err.Error(), test.err) + return + } + require.Empty(t, test.err) + assert.ElementsMatch(t, slices.Collect(maps.Keys(used)), test.used) + }) + } +} diff --git a/pkg/aflow/tool/codesearcher/codesearcher.go b/pkg/aflow/tool/codesearcher/codesearcher.go new file mode 100644 index 000000000000..79827d26941d --- /dev/null +++ b/pkg/aflow/tool/codesearcher/codesearcher.go @@ -0,0 +1,171 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package codesearcher + +import ( + "fmt" + "path/filepath" + + "github.com/google/syzkaller/pkg/aflow" + "github.com/google/syzkaller/pkg/clangtool" + "github.com/google/syzkaller/pkg/codesearch" + "github.com/google/syzkaller/pkg/hash" +) + +var Tools = []aflow.Tool{ + aflow.NewFuncTool("codesearch-file-index", fileIndex, ` +Tool provides list of entities defined in the given source file. +Entity can be function, struct, or global variable. +Use it to understand what other things of interest exist in a file. +For example, to locate some initialization function that sets up invariants, +or to find a group of similar functions to later assess similarities/differences +in their implementations. +`), + aflow.NewFuncTool("codesearch-definition-comment", definitionComment, ` +Tool provides source code comment for an entity with the given name. +Entity can be function, struct, or global variable. +Use it to understand how an entity is supposed to be used. +For example, what a function does, or if it may be invoked with NULL pointer argument or not. +But an entity may not have a comment, in which case an empty comment is returned. +In such case, you may consider using codesearch-definition-source tool to look +at the full source code of the entity. +`), + aflow.NewFuncTool("codesearch-definition-source", definitionSource, ` +Tool provides full source code for an entity with the given name. +Entity can be function, struct, or global variable. +Use it to understand implementation details of an entity. +For example, how a function works, what precondition error checks it has, etc. +`), +} + +// This action needs to run before any agents that use codesearch tools. +var PrepareIndex = aflow.NewFuncAction("codesearch-prepare", prepare) + +type prepareArgs struct { + KernelCommit string + KernelConfig string + KernelSrc string + KernelObj string + CodesearchToolBin string +} + +type prepareResult struct { + Index index +} + +type fileIndexArgs struct { + SourceFile string `jsonschema:"Source file path."` +} + +type fileIndexResult struct { + Missing bool `jsonschema:"Set to true if the file with the given name does not exist."` + Entities []indexEntity `jsonschema:"List of entites defined in the file."` +} + +type indexEntity struct { + Kind string `jsonschema:"Kind of the entity: function, struct, variable."` + Name string `jsonschema:"Name of the entity."` +} + +// nolint: lll +type defCommentArgs struct { + SourceFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."` + Name string `jsonschema:"Name of the entity of interest."` +} + +type defCommentResult struct { + Missing bool `jsonschema:"Set to true if the entity with the given name does not exist."` + Kind string `jsonschema:"Kind of the entity: function, struct, variable."` + Comment string `jsonschema:"Source comment for the entity."` +} + +// nolint: lll +type defSourceArgs struct { + SourceFile string `jsonschema:"Source file path that references the entity. It helps to restrict scope of the search, if there are different definitions with the same name in different source files."` + Name string `jsonschema:"Name of the entity of interest."` + IncludeLines bool `jsonschema:"Whether to include line numbers in the output or not. Line numbers may distract you, so ask for them only if you need to match lines elsewhere with the source code."` +} + +// nolint: lll +type defSourceResult struct { + Missing bool `jsonschema:"Set to true if the entity with the given name does not exist."` + SourceFile string `jsonschema:"Source file path where the entity is defined."` + SourceCode string `jsonschema:"Source code of the entity definition. It is prefixed with line numbers, so that they can be referenced in other tool invocations."` +} + +// index prevents full JSON marshalling of the index contexts, +// so that they do not appear in logs/journal, and also ensures +// that the index does not pass JSON marshalling round-trip. +type index struct { + *codesearch.Index +} + +func (index) MarshalJSON() ([]byte, error) { + return []byte(`"codesearch-index"`), nil +} + +func (index) UnmarshalJSON([]byte) error { + return fmt.Errorf("codesearch-index cannot be unmarshalled") +} + +func prepare(ctx *aflow.Context, args prepareArgs) (prepareResult, error) { + desc := fmt.Sprintf("kernel commit %v, config hash %v", + args.KernelCommit, hash.String(args.KernelConfig)) + dir, err := ctx.Cache("codesearch", desc, func(dir string) error { + cfg := &clangtool.Config{ + ToolBin: args.CodesearchToolBin, + KernelSrc: args.KernelSrc, + KernelObj: args.KernelObj, + CacheFile: filepath.Join(dir, "index.json"), + } + _, err := clangtool.Run[codesearch.Database](cfg) + return err + }) + if err != nil { + return prepareResult{}, err + } + srcDirs := []string{args.KernelSrc, args.KernelObj} + csIndex, err := codesearch.NewIndex(filepath.Join(dir, "index.json"), srcDirs) + return prepareResult{index{csIndex}}, err +} + +func fileIndex(ctx *aflow.Context, state prepareResult, args fileIndexArgs) (fileIndexResult, error) { + ok, entities, err := state.Index.FileIndex(args.SourceFile) + res := fileIndexResult{ + Missing: !ok, + } + for _, ent := range entities { + res.Entities = append(res.Entities, indexEntity{ + Kind: ent.Kind, + Name: ent.Name, + }) + } + return res, err +} + +func definitionComment(ctx *aflow.Context, state prepareResult, args defCommentArgs) (defCommentResult, error) { + info, err := state.Index.DefinitionComment(args.SourceFile, args.Name) + if err != nil || info == nil { + return defCommentResult{ + Missing: info == nil, + }, err + } + return defCommentResult{ + Kind: info.Kind, + Comment: info.Body, + }, nil +} + +func definitionSource(ctx *aflow.Context, state prepareResult, args defSourceArgs) (defSourceResult, error) { + info, err := state.Index.DefinitionSource(args.SourceFile, args.Name, args.IncludeLines) + if err != nil || info == nil { + return defSourceResult{ + Missing: info == nil, + }, err + } + return defSourceResult{ + SourceFile: info.File, + SourceCode: info.Body, + }, nil +} diff --git a/pkg/aflow/verify.go b/pkg/aflow/verify.go new file mode 100644 index 000000000000..d7ccbd124e27 --- /dev/null +++ b/pkg/aflow/verify.go @@ -0,0 +1,98 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package aflow + +import ( + "fmt" + "maps" + "reflect" + "slices" +) + +type verifyContext struct { + actions map[string]bool + state map[string]*varState + err error +} + +type varState struct { + action string + typ reflect.Type + used bool +} + +func (ctx *verifyContext) errorf(who, msg string, args ...any) { + noteError(&ctx.err, fmt.Sprintf("action %v: %v", who, msg), args...) +} + +func (ctx *verifyContext) requireNotEmpty(who, name, value string) { + if value == "" { + ctx.errorf(who, "%v must not be empty", name) + } +} + +func (ctx *verifyContext) requireInput(who, name string, typ reflect.Type) { + state := ctx.state[name] + if state == nil { + ctx.errorf(who, "no input %v, available inputs: %v", + name, slices.Collect(maps.Keys(ctx.state))) + return + } + if typ != state.typ { + ctx.errorf(who, "input %v has wrong type: want %v, has %v", + name, typ, state.typ) + } + state.used = true +} + +func (ctx *verifyContext) provideOutput(who, name string, typ reflect.Type, unique bool) { + state := ctx.state[name] + if state != nil { + if unique { + ctx.errorf(who, "output %v is already set", name) + } else if typ != state.typ { + ctx.errorf(who, "output %v changes type: %v -> %v", + name, state.typ, typ) + } else if !state.used { + ctx.errorf(state.action, "output %v is unused", name) + } + } + ctx.state[name] = &varState{ + action: who, + typ: typ, + } +} + +func (ctx *verifyContext) finalize() error { + for name, state := range ctx.state { + if !state.used { + ctx.errorf(state.action, "output %v is unused", name) + } + } + return ctx.err +} + +func noteError(errp *error, msg string, args ...any) { + if *errp == nil { + *errp = fmt.Errorf(msg, args...) + } +} + +func requireInputs[T any](ctx *verifyContext, who string) { + for name, typ := range foreachFieldOf[T]() { + ctx.requireInput(who, name, typ) + } +} + +func provideOutputs[T any](ctx *verifyContext, who string) { + for name, typ := range foreachFieldOf[T]() { + ctx.provideOutput(who, name, typ, true) + } +} + +func requireSchema[T any](ctx *verifyContext, who, what string) { + if _, err := schemaFor[T](); err != nil { + ctx.errorf(who, "%v: %v", what, err) + } +} diff --git a/pkg/osutil/osutil.go b/pkg/osutil/osutil.go index 936465ee7ea6..fa963a3fbbae 100644 --- a/pkg/osutil/osutil.go +++ b/pkg/osutil/osutil.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "os/exec" "path/filepath" @@ -366,3 +367,20 @@ func CreationTime(fi os.FileInfo) time.Time { // //go:linkname MonotonicNano runtime.nanotime func MonotonicNano() time.Duration + +// DiskUsage returns total recursive disk usage of the dir (similar to du -s). +func DiskUsage(dir string) (uint64, error) { + var total uint64 + err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + info, err := d.Info() + if err != nil { + return err + } + total += sysDiskUsage(info) + return nil + }) + return total, err +} diff --git a/pkg/osutil/osutil_darwin.go b/pkg/osutil/osutil_darwin.go deleted file mode 100644 index 0b1f30711251..000000000000 --- a/pkg/osutil/osutil_darwin.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2017 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package osutil - -import ( - "os" - "os/exec" - "time" -) - -func creationTime(fi os.FileInfo) time.Time { - return time.Time{} -} - -func RemoveAll(dir string) error { - return os.RemoveAll(dir) -} - -func SystemMemorySize() uint64 { - return 0 -} - -func prolongPipe(r, w *os.File) { -} - -func Sandbox(cmd *exec.Cmd, user, net bool) error { - return nil -} - -func SandboxChown(file string) error { - return nil -} - -func setPdeathsig(cmd *exec.Cmd, hardKill bool) { -} - -func killPgroup(cmd *exec.Cmd) { -} diff --git a/pkg/osutil/osutil_fuchsia.go b/pkg/osutil/osutil_fuchsia.go index fce4e38ba303..a4a8c22b2473 100644 --- a/pkg/osutil/osutil_fuchsia.go +++ b/pkg/osutil/osutil_fuchsia.go @@ -7,43 +7,12 @@ package osutil import ( "os" - "os/exec" - "time" ) -func creationTime(fi os.FileInfo) time.Time { - return time.Time{} -} - func HandleInterrupts(shutdown chan struct{}) { } -func RemoveAll(dir string) error { - return os.RemoveAll(dir) -} - -func SystemMemorySize() uint64 { - return 0 -} - func ProcessExitStatus(ps *os.ProcessState) int { // TODO: can be extracted from ExitStatus string. return 0 } - -func prolongPipe(r, w *os.File) { -} - -func Sandbox(cmd *exec.Cmd, user, net bool) error { - return nil -} - -func SandboxChown(file string) error { - return nil -} - -func setPdeathsig(cmd *exec.Cmd, hardKill bool) { -} - -func killPgroup(cmd *exec.Cmd) { -} diff --git a/pkg/osutil/osutil_linux.go b/pkg/osutil/osutil_linux.go index 63743a5f2a6b..50be9b047282 100644 --- a/pkg/osutil/osutil_linux.go +++ b/pkg/osutil/osutil_linux.go @@ -5,6 +5,7 @@ package osutil import ( "fmt" + "io/fs" "os" "os/exec" "path/filepath" @@ -150,3 +151,8 @@ func prolongPipe(r, w *os.File) { syscall.Syscall(syscall.SYS_FCNTL, w.Fd(), syscall.F_SETPIPE_SZ, uintptr(sz)) } } + +func sysDiskUsage(info fs.FileInfo) uint64 { + stat := info.Sys().(*syscall.Stat_t) + return uint64(max(0, stat.Size, stat.Blocks*512)) +} diff --git a/pkg/osutil/osutil_bsd.go b/pkg/osutil/osutil_nonlinux.go similarity index 76% rename from pkg/osutil/osutil_bsd.go rename to pkg/osutil/osutil_nonlinux.go index b26c91ff7d03..1d3ee8b82f03 100644 --- a/pkg/osutil/osutil_bsd.go +++ b/pkg/osutil/osutil_nonlinux.go @@ -1,11 +1,12 @@ -// Copyright 2017 syzkaller project authors. All rights reserved. +// Copyright 2026 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. -//go:build freebsd || netbsd || openbsd +//go:build !linux package osutil import ( + "io/fs" "os" "os/exec" "time" @@ -39,3 +40,7 @@ func setPdeathsig(cmd *exec.Cmd, hardKill bool) { func killPgroup(cmd *exec.Cmd) { } + +func sysDiskUsage(info fs.FileInfo) uint64 { + return uint64(max(0, info.Size())) +} diff --git a/pkg/osutil/osutil_test.go b/pkg/osutil/osutil_test.go index 4d9bf4cf81b3..460cc85ca898 100644 --- a/pkg/osutil/osutil_test.go +++ b/pkg/osutil/osutil_test.go @@ -4,6 +4,7 @@ package osutil import ( + "bytes" "fmt" "os" "path/filepath" @@ -150,3 +151,44 @@ func TestReadWriteJSON(t *testing.T) { t.Fatal(diff) } } + +func TestDiskUsage(t *testing.T) { + dir := t.TempDir() + var currentUsage uint64 + expectUsage := func(minIncrease, maxIncrease uint64) { + usage, err := DiskUsage(dir) + if err != nil { + t.Fatal(err) + } + expectMin := currentUsage + minIncrease + expectMax := currentUsage + maxIncrease + t.Logf("got usage %v when expected (%v, %v)", usage, expectMin, expectMax) + if usage <= expectMin || usage >= expectMax { + t.Fatalf("bad usage %v, expect (%v, %v)", usage, expectMin, expectMax) + } + currentUsage = usage + } + expectUsage(1, 5<<10) + if err := MkdirAll(filepath.Join(dir, "nested")); err != nil { + t.Fatal(err) + } + expectUsage(1, 5<<10) + if err := WriteFile(filepath.Join(dir, "nested", "foo"), bytes.Repeat([]byte{'a'}, 1<<10)); err != nil { + t.Fatal(err) + } + expectUsage(1<<10, 5<<10) + if err := WriteFile(filepath.Join(dir, "nested", "bar"), bytes.Repeat([]byte{'a'}, 10<<10)); err != nil { + t.Fatal(err) + } + expectUsage(10<<10, 14<<10) + // Symlinks must not be counted twice. + if err := os.Symlink(filepath.Join(dir, "nested"), filepath.Join(dir, "dirlink")); err != nil { + t.Fatal(err) + } + expectUsage(1, 1<<10) + + if err := os.Symlink(filepath.Join(dir, "nested", "bar"), filepath.Join(dir, "filelink")); err != nil { + t.Fatal(err) + } + expectUsage(1, 1<<10) +} diff --git a/pkg/osutil/osutil_windows.go b/pkg/osutil/osutil_windows.go index 03e729e836f8..db9f56efa301 100644 --- a/pkg/osutil/osutil_windows.go +++ b/pkg/osutil/osutil_windows.go @@ -5,43 +5,12 @@ package osutil import ( "os" - "os/exec" "syscall" - "time" ) -func creationTime(fi os.FileInfo) time.Time { - return time.Time{} -} - func HandleInterrupts(shutdown chan struct{}) { } -func RemoveAll(dir string) error { - return os.RemoveAll(dir) -} - -func SystemMemorySize() uint64 { - return 0 -} - -func prolongPipe(r, w *os.File) { -} - func ProcessExitStatus(ps *os.ProcessState) int { return ps.Sys().(syscall.WaitStatus).ExitStatus() } - -func Sandbox(cmd *exec.Cmd, user, net bool) error { - return nil -} - -func SandboxChown(file string) error { - return nil -} - -func setPdeathsig(cmd *exec.Cmd, hardKill bool) { -} - -func killPgroup(cmd *exec.Cmd) { -} diff --git a/pkg/updater/updater.go b/pkg/updater/updater.go index bf7b816ca6f2..ed49386092f2 100644 --- a/pkg/updater/updater.go +++ b/pkg/updater/updater.go @@ -88,6 +88,7 @@ func New(cfg *Config) (*Updater, error) { "tag": true, // contains syzkaller repo git hash "bin/syz-ci": true, // these are just copied from syzkaller dir "bin/syz-manager": true, + "bin/syz-agent": true, "sys/*/test/*": true, } for target := range cfg.Targets { @@ -273,7 +274,7 @@ func (upd *Updater) build(commit *vcs.Commit) error { } } // This will also generate descriptions and should go before the 'go test' below. - cmd := osutil.Command(instance.MakeBin, "host", "ci") + cmd := osutil.Command(instance.MakeBin, "host", "ci", "agent") cmd.Dir = upd.syzkallerDir cmd.Env = append([]string{"GOPATH=" + upd.gopathDir}, os.Environ()...) if _, err := osutil.Run(time.Hour, cmd); err != nil { diff --git a/syz-agent/agent.go b/syz-agent/agent.go new file mode 100644 index 000000000000..a1d11445ca3b --- /dev/null +++ b/syz-agent/agent.go @@ -0,0 +1,224 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "maps" + _ "net/http/pprof" + "path/filepath" + "sync" + "time" + + "github.com/google/syzkaller/dashboard/dashapi" + "github.com/google/syzkaller/pkg/aflow" + _ "github.com/google/syzkaller/pkg/aflow/flow" + "github.com/google/syzkaller/pkg/aflow/trajectory" + "github.com/google/syzkaller/pkg/config" + "github.com/google/syzkaller/pkg/log" + "github.com/google/syzkaller/pkg/mgrconfig" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/tool" + "github.com/google/syzkaller/pkg/updater" + "github.com/google/syzkaller/prog" +) + +type Config struct { + // Currently serves only net/http/pprof handlers. + HTTP string `json:"http"` + DashboardAddr string `json:"dashboard_addr"` + DashboardClient string `json:"dashboard_client"` // Global non-namespace client. + DashboardKey string `json:"dashboard_key"` + SyzkallerRepo string `json:"syzkaller_repo"` + SyzkallerBranch string `json:"syzkaller_branch"` + // Pre-built tools/clang/codesearch clang tool. + CodesearchToolBin string `json:"codesearch_tool_bin"` + KernelConfig string `json:"kernel_config"` + Target string `json:"target"` + Image string `json:"image"` + Type string `json:"type"` + VM json.RawMessage `json:"vm"` + // Use fixed base commit for patching jobs (for testing). + FixedBaseCommit string `json:"fixed_base_commit"` + // Use this LLM model (for testing, if empty use a default model). + Model string `json:"model"` +} + +func main() { + var ( + flagConfig = flag.String("config", "", "config file") + flagExitOnUpgrade = flag.Bool("exit-on-upgrade", false, + "exit after a syz-ci upgrade is applied; otherwise syz-ci restarts") + flagAutoUpdate = flag.Bool("autoupdate", true, "auto-update the binary (for testing)") + ) + defer tool.Init()() + log.SetName("syz-agent") + if err := run(*flagConfig, *flagExitOnUpgrade, *flagAutoUpdate); err != nil { + log.Fatal(err) + } +} + +func run(configFile string, exitOnUpgrade, autoUpdate bool) error { + cfg := &Config{ + SyzkallerRepo: "https://github.com/google/syzkaller.git", + SyzkallerBranch: "master", + Model: aflow.DefaultModel, + } + if err := config.LoadFile(configFile, cfg); err != nil { + return fmt.Errorf("failed to load config: %w", err) + } + tool.ServeHTTP(cfg.HTTP) + os, vmarch, arch, _, _, err := mgrconfig.SplitTarget(cfg.Target) + if err != nil { + return err + } + dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey) + if err != nil { + return err + } + buildSem := osutil.NewSemaphore(1) + updater, err := updater.New(&updater.Config{ + ExitOnUpdate: exitOnUpgrade, + BuildSem: buildSem, + SyzkallerRepo: cfg.SyzkallerRepo, + SyzkallerBranch: cfg.SyzkallerBranch, + Targets: map[updater.Target]bool{ + { + OS: os, + VMArch: vmarch, + Arch: arch, + }: true, + }, + }) + if err != nil { + return err + } + updatePending := make(chan struct{}) + shutdownPending := make(chan struct{}) + osutil.HandleInterrupts(shutdownPending) + updater.UpdateOnStart(autoUpdate, updatePending, shutdownPending) + + const workdir = "workdir" + const cacheSize = 1 << 40 // 1TB should be enough for everyone! + cache, err := aflow.NewCache(filepath.Join(workdir, "cache"), cacheSize) + if err != nil { + return err + } + + s := &Server{ + cfg: cfg, + dash: dash, + cache: cache, + workdir: workdir, + } + + ctx, stop := context.WithCancel(context.Background()) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for { + ok, err := s.poll(ctx) + if err != nil { + log.Error(err) + dash.LogError("syz-agent", "%v", err) + } + var delay time.Duration + if !ok { + // Don't poll dashboard too often, if there are no jobs, + // or errors are happenning. + delay = 10 * time.Second + } + select { + case <-ctx.Done(): + return + case <-time.After(delay): + } + } + }() + + select { + case <-shutdownPending: + case <-updatePending: + } + stop() + wg.Wait() + + select { + case <-shutdownPending: + default: + updater.UpdateAndRestart() + } + return nil +} + +type Server struct { + cfg *Config + dash *dashapi.Dashboard + cache *aflow.Cache + workdir string +} + +func (s *Server) poll(ctx context.Context) ( + bool, error) { + req := &dashapi.AIJobPollReq{ + LLMModel: s.cfg.Model, + CodeRevision: prog.GitRevision, + } + for _, flow := range aflow.Flows { + req.Workflows = append(req.Workflows, dashapi.AIWorkflow{ + Type: flow.Type, + Name: flow.Name, + }) + } + resp, err := s.dash.AIJobPoll(req) + if err != nil { + return false, err + } + if resp.ID == "" { + return false, nil + } + doneReq := &dashapi.AIJobDoneReq{ + ID: resp.ID, + } + results, jobErr := s.executeJob(ctx, resp) + doneReq.Results = results + if jobErr != nil { + doneReq.Error = jobErr.Error() + } + if err := s.dash.AIJobDone(doneReq); err != nil { + return false, err + } + if jobErr != nil { + return false, jobErr + } + return true, nil +} + +func (s *Server) executeJob(ctx context.Context, req *dashapi.AIJobPollResp) (map[string]any, error) { + flow := aflow.Flows[req.Workflow] + if flow == nil { + return nil, fmt.Errorf("unsupported flow %q", req.Workflow) + } + inputs := map[string]any{ + "Syzkaller": osutil.Abs(filepath.FromSlash("syzkaller/current")), + "CodesearchToolBin": s.cfg.CodesearchToolBin, + "Image": s.cfg.Image, + "Type": s.cfg.Type, + "VM": s.cfg.VM, + "FixedBaseCommit": s.cfg.FixedBaseCommit, + } + maps.Insert(inputs, maps.All(req.Args)) + onEvent := func(span *trajectory.Span) error { + log.Logf(0, "%v", span) + return s.dash.AITrajectoryLog(&dashapi.AITrajectoryReq{ + JobID: req.ID, + Span: span, + }) + } + return flow.Execute(ctx, s.cfg.Model, s.workdir, inputs, s.cache, onEvent) +} diff --git a/tools/clang/codesearch/README.md b/tools/clang/codesearch/README.md new file mode 100644 index 000000000000..3f444f61de8a --- /dev/null +++ b/tools/clang/codesearch/README.md @@ -0,0 +1,8 @@ +# syz-codesearch + +Clang-based tool that indexes kernel source code to power +[pkg/aflow/tool/codesearcher](/pkg/aflow/tool/codesearcher/codesearcher.go) +agentic tool. + +The tool can be built following the procedure described for +[syz-declextract tool](/tools/syz-declextract/README.md). diff --git a/tools/syz-aflow/aflow.go b/tools/syz-aflow/aflow.go new file mode 100644 index 000000000000..ea127b164aea --- /dev/null +++ b/tools/syz-aflow/aflow.go @@ -0,0 +1,132 @@ +// Copyright 2025 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// syz-aflow tool can be used to invoke any agentic workflow registered with pkg/aflow. +// For example, to run the patching workflow use: +// +// go run ./tools/syz-aflow -input=input.json -download-bug=d8fd35fa6177afa8c92b +// go run ./tools/syz-aflow -input=input.json -workflow=patching -workdir=workdir +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "os" + "path/filepath" + + "github.com/google/syzkaller/pkg/aflow" + _ "github.com/google/syzkaller/pkg/aflow/flow" + "github.com/google/syzkaller/pkg/aflow/trajectory" + "github.com/google/syzkaller/pkg/osutil" + "github.com/google/syzkaller/pkg/tool" +) + +func main() { + var ( + flagFlow = flag.String("workflow", "", "workflow to execute") + flagInput = flag.String("input", "", "input json file with workflow arguments") + flagWorkdir = flag.String("workdir", "", "directory for kernel checkout, kernel builds, etc") + flagModel = flag.String("model", aflow.DefaultModel, "use this LLM model") + flagDownloadBug = flag.String("download-bug", "", "extid of a bug to download from the dashboard"+ + " and save into -input file") + ) + defer tool.Init()() + if *flagDownloadBug != "" { + if err := downloadBug(*flagDownloadBug, *flagInput); err != nil { + tool.Fail(err) + } + return + } + if *flagFlow == "" { + fmt.Fprintf(os.Stderr, "syz-aflow usage:\n") + flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "available workflows:\n") + for _, flow := range aflow.Flows { + fmt.Fprintf(os.Stderr, "\t%v: %v\n", flow.Name, flow.Description) + } + return + } + if err := run(context.Background(), *flagModel, *flagFlow, *flagInput, *flagWorkdir); err != nil { + tool.Fail(err) + } +} + +func run(ctx context.Context, model, flowName, inputFile, workdir string) error { + flow := aflow.Flows[flowName] + if flow == nil { + return fmt.Errorf("workflow %q is not found", flowName) + } + inputData, err := os.ReadFile(inputFile) + if err != nil { + return fmt.Errorf("failed to open -input file: %w", err) + } + var inputs map[string]any + if err := json.Unmarshal(inputData, &inputs); err != nil { + return err + } + cache, err := aflow.NewCache(filepath.Join(workdir, "cache"), 0) + if err != nil { + return err + } + _, err = flow.Execute(ctx, model, workdir, inputs, cache, onEvent) + return err +} + +func onEvent(span *trajectory.Span) error { + log.Printf("%v", span) + return nil +} + +func downloadBug(extID, inputFile string) error { + if inputFile == "" { + return fmt.Errorf("-download-bug requires -input flag") + } + resp, err := get(fmt.Sprintf("/bug?extid=%v&json=1", extID)) + if err != nil { + return err + } + var info map[string]any + if err := json.Unmarshal([]byte(resp), &info); err != nil { + return err + } + crash := info["crashes"].([]any)[0].(map[string]any) + inputs := map[string]any{ + "SyzkallerCommit": crash["syzkaller-commit"], + } + inputs["ReproSyz"], err = get(crash["syz-reproducer"].(string)) + if err != nil { + return err + } + inputs["ReproC"], err = get(crash["c-reproducer"].(string)) + if err != nil { + return err + } + inputs["KernelConfig"], err = get(crash["kernel-config"].(string)) + if err != nil { + return err + } + data, err := json.MarshalIndent(inputs, "", "\t") + if err != nil { + return err + } + return osutil.WriteFile(inputFile, data) +} + +func get(path string) (string, error) { + if path == "" { + return "", nil + } + const host = "https://syzbot.org" + resp, err := http.Get(fmt.Sprintf("%v%v", host, path)) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + return string(body), err +}