Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ type Evaluate struct {
Configuration string `long:"configuration" description:"Configuration file to set up an evaluation run."`
// ExecutionTimeout holds the timeout for an execution.
ExecutionTimeout uint `long:"execution-timeout" description:"Execution timeout for compilation and tests in minutes." default:"5"`
// RunIDStartsAt holds the offset increment for the run id used in creating the result folders.
RunIDStartsAt uint `long:"run-id-starts-at" description:"Sets the starting index for the run ID." default:"1"`
// Runs holds the number of runs to perform.
Runs uint `long:"runs" description:"Number of runs to perform." default:"1"`
// RunsSequential indicates that interleaved runs are disabled and runs are performed sequentially.
Expand Down Expand Up @@ -175,6 +177,8 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
language.DefaultExecutionTimeout = time.Duration(command.ExecutionTimeout) * time.Minute
}

evaluationContext.RunIDStartsAt = command.RunIDStartsAt

if command.Runs == 0 {
command.logger.Panicf("number of configured runs must be greater than zero")
}
Expand Down
8 changes: 6 additions & 2 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ type Context struct {
// TestdataPath determines the testdata path where all repositories reside grouped by languages.
TestdataPath string

// RunIDStartsAt holds the run ID starting index created when running a evaluation multiple times.
RunIDStartsAt uint
// Runs holds the number of runs to perform.
Runs uint
// RunsSequential indicates that interleaved runs are disabled and runs are performed sequentially.
Expand Down Expand Up @@ -148,6 +150,7 @@ func Evaluate(ctx *Context) {
} else {
runCount = rl + 1
}
runID := ctx.RunIDStartsAt + runCount - 1

if err := temporaryRepository.Reset(logger); err != nil {
logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
Expand Down Expand Up @@ -177,7 +180,7 @@ func Evaluate(ctx *Context) {
}

// Write the task assessment to the evaluation CSV file.
if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment); err != nil {
if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runID, assessment); err != nil {
logger.Panicf("ERROR: cannot write evaluation record: %s", err)
}
}
Expand Down Expand Up @@ -271,6 +274,7 @@ func Evaluate(ctx *Context) {
} else {
runCount = rl + 1
}
runID := ctx.RunIDStartsAt + runCount - 1

if err := temporaryRepository.Reset(logger); err != nil {
logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
Expand All @@ -292,7 +296,7 @@ func Evaluate(ctx *Context) {
}

// Write the task assessment to the evaluation CSV file.
if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment); err != nil {
if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runID, assessment); err != nil {
logger.Panicf("ERROR: cannot write evaluation record: %s", err)
}
}
Expand Down
22 changes: 20 additions & 2 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
modeltesting "github.com/symflower/eval-dev-quality/model/testing"
"github.com/symflower/eval-dev-quality/provider"
providertesting "github.com/symflower/eval-dev-quality/provider/testing"
"github.com/symflower/gota/dataframe"
"github.com/symflower/gota/series"
)

var (
Expand Down Expand Up @@ -1235,6 +1237,7 @@ func TestEvaluate(t *testing.T) {
repositoryPath,
},

RunIDStartsAt: 11,
Runs: 3,
RunsSequential: false,
},
Expand Down Expand Up @@ -1395,7 +1398,14 @@ func TestEvaluate(t *testing.T) {
assert.Equal(t, 1, strings.Count(data, "creating temporary repository"), "create only one temporary repository")
},
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
"evaluation.csv": nil,
"evaluation.csv": func(t *testing.T, filePath string, data string) {
dataFrame := dataframe.ReadCSV(strings.NewReader(data))
assert.NoError(t, dataFrame.Err)

expectedColumnRun := series.New([]int{11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13}, series.Int, "run")
actualColumnRun := dataFrame.Col("run")
assert.Equal(t, expectedColumnRun, actualColumnRun)
},
},
})
}
Expand Down Expand Up @@ -1425,6 +1435,7 @@ func TestEvaluate(t *testing.T) {
repositoryPath,
},

RunIDStartsAt: 21,
Runs: 3,
RunsSequential: true,
},
Expand Down Expand Up @@ -1585,7 +1596,14 @@ func TestEvaluate(t *testing.T) {
assert.Contains(t, data, "\"msg\":\"starting run\",\"count\":3,\"total\":3,")
assert.NotRegexp(t, `\\\"msg\\\":\\\"starting run\\\",\\\"count\\\":\d+,\\\"total\\\":\d+\}`, data)
},
"evaluation.csv": nil,
"evaluation.csv": func(t *testing.T, filePath string, data string) {
dataFrame := dataframe.ReadCSV(strings.NewReader(data))
assert.NoError(t, dataFrame.Err)

expectedColumnRun := series.New([]int{21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23}, series.Int, "run")
actualColumnRun := dataFrame.Col("run")
assert.Equal(t, expectedColumnRun, actualColumnRun)
},
},
})
}
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/sashabaranov/go-openai v1.38.0
github.com/stretchr/testify v1.10.0
github.com/symflower/gota v0.0.0-20250312083757-2306c53d6db1
github.com/symflower/lockfile v0.0.0-20240419143922-aa3b60940c84
github.com/zimmski/osutil v1.7.1
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394
Expand Down Expand Up @@ -68,6 +69,7 @@ require (
github.com/yuin/goldmark v1.7.8 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp/typeparams v0.0.0-20250305212735-054e65f0b394 // indirect
golang.org/x/net v0.37.0 // indirect
golang.org/x/sync v0.12.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/telemetry v0.0.0-20250310203348-fdfaad844314 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/symflower/gota v0.0.0-20250312083757-2306c53d6db1 h1:I+amS+Ns1Jqe1pPeC+QzshwKqFT7K79SUkKEOwWdYX0=
github.com/symflower/gota v0.0.0-20250312083757-2306c53d6db1/go.mod h1:UltkFgAA4b+vETI4lB6yS2FmZG9SUSJRxJuPwmCLytg=
github.com/symflower/lockfile v0.0.0-20240419143922-aa3b60940c84 h1:yhPz6r3LLBDjoV0rIDUlyuvWQg9L4MTfdksLVX6/q0s=
github.com/symflower/lockfile v0.0.0-20240419143922-aa3b60940c84/go.mod h1:W/87GmsQmvlsvcXeuAlTGjIpTHrTTHDEIVH936LjnqI=
github.com/symflower/pretty v1.0.0 h1:wYSv0CBazyyzHNiGTwjkLzcmUQUFjRafEyWf3A7LJCk=
Expand All @@ -150,6 +152,8 @@ golang.org/x/exp/typeparams v0.0.0-20250305212735-054e65f0b394 h1:VI4qDpTkfFaCXE
golang.org/x/exp/typeparams v0.0.0-20250305212735-054e65f0b394/go.mod h1:LKZHyeOpPuZcMgxeHjJp4p5yvxrCX1xDvH10zYHhjjQ=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
Loading