Skip to content

Commit d321c8b

Browse files
Implement CEL runtime cost budget to prevent resource exhaustion
Added cost tracking and limiting for CEL expressions to prevent resource exhaustion from complex expressions. Implemented both per-call limits and a total runtime budget for ResourceGraphDefinition reconciliation. - Added cost tracking constants and helper functions in pkg/cel/cost.go - Implemented cost tracking in runtime.evaluateExpression - Added proper error handling in controller_reconcile.go for budget exceeded errors - Updated dryRunExpression to use cost tracking options Fixes #191
1 parent bf29b58 commit d321c8b

File tree

7 files changed

+529
-15
lines changed

7 files changed

+529
-15
lines changed

Diff for: pkg/cel/cost.go

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package cel
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"strings"
7+
8+
"github.com/google/cel-go/cel"
9+
)
10+
11+
const (
12+
// PerCallLimit specifies the cost limit per individual CEL expression evaluation
13+
// This gives roughly 0.1 second of execution time per expression evaluation
14+
PerCallLimit = 1000000
15+
16+
// RuntimeCELCostBudget is the total cost budget allowed during a single
17+
// ResourceGroup reconciliation cycle. This includes all expression evaluations
18+
// across all resources defined in the ResourceGroup. The budget gives roughly
19+
// 1 second of total execution time before being exceeded.
20+
RuntimeCELCostBudget = 1000
21+
22+
// CheckFrequency configures the number of iterations within a comprehension to evaluate
23+
// before checking whether the function evaluation has been interrupted
24+
CheckFrequency = 100
25+
)
26+
27+
var ErrCELBudgetExceeded = errors.New("CEL Cost budget exceeded")
28+
29+
// IsCostLimitExceeded checks if the error is related to CEL cost limit exceeding
30+
func IsCostLimitExceeded(err error) bool {
31+
if err == nil {
32+
return false
33+
}
34+
return strings.Contains(err.Error(), "cost limit exceeded")
35+
}
36+
37+
// WrapCostLimitExceeded wraps a CEL cost limit error with our standard ErrCELBudgetExceeded
38+
// If the error is not a cost limit error, it returns the original error
39+
func WrapCostLimitExceeded(err error, totalCost int64) error {
40+
if err == nil {
41+
return nil
42+
}
43+
44+
if IsCostLimitExceeded(err) {
45+
return errors.New(ErrCELBudgetExceeded.Error() + ": total CEL cost " +
46+
fmt.Sprintf("%d", totalCost) + " exceeded budget of " +
47+
fmt.Sprintf("%d", RuntimeCELCostBudget))
48+
}
49+
50+
return err
51+
}
52+
53+
func WithCostTracking(costLimit int64) []cel.ProgramOption {
54+
return []cel.ProgramOption{
55+
cel.CostLimit(uint64(costLimit)),
56+
cel.EvalOptions(cel.OptTrackCost),
57+
}
58+
}

Diff for: pkg/cel/cost_test.go

+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// Copyright 2025 The Kube Resource Orchestrator Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
package cel
15+
16+
import (
17+
"errors"
18+
"fmt"
19+
"testing"
20+
21+
"github.com/google/cel-go/cel"
22+
"github.com/stretchr/testify/assert"
23+
"github.com/stretchr/testify/require"
24+
)
25+
26+
// TestOnlyCELBudgetValues sets up values for testing
27+
// These need to be much higher than normal to prevent test failures
28+
var (
29+
TestOnlyRuntimeCELCostBudget = int64(100000000000) // 10B - very high for tests
30+
TestOnlyPerCallLimit = int64(10000000) // 10M - high enough for any test expression
31+
)
32+
33+
// WithTestBudget returns program options suitable for testing,
34+
// with much higher limits than production
35+
func WithTestBudget() []cel.ProgramOption {
36+
return []cel.ProgramOption{
37+
cel.CostLimit(uint64(TestOnlyPerCallLimit)),
38+
cel.EvalOptions(cel.OptTrackCost),
39+
}
40+
}
41+
42+
func TestWithCostTracking(t *testing.T) {
43+
// WithCostTracking returns the expected number of options
44+
opts := WithCostTracking(10000)
45+
assert.Equal(t, 2, len(opts), "Expected 2 options to be returned")
46+
}
47+
48+
func TestCELBudgetExceeded(t *testing.T) {
49+
env, err := cel.NewEnv()
50+
require.NoError(t, err)
51+
52+
ast, iss := env.Compile(`"test"`)
53+
require.NoError(t, iss.Err())
54+
55+
// Testing with unlimited budget (should succeed)
56+
opts := WithCostTracking(10000)
57+
program, err := env.Program(ast, opts...)
58+
require.NoError(t, err)
59+
60+
// Evaluating with sufficient budget
61+
_, details, err := program.Eval(map[string]interface{}{})
62+
require.NoError(t, err)
63+
assert.NotNil(t, details)
64+
assert.NotNil(t, details.ActualCost())
65+
66+
// Testing with minimal budget (still should pass for this trivial expression)
67+
opts = WithCostTracking(1)
68+
program, err = env.Program(ast, opts...)
69+
require.NoError(t, err)
70+
71+
// Evaluating with minimal budget
72+
_, details, err = program.Eval(map[string]interface{}{})
73+
require.NoError(t, err)
74+
assert.NotNil(t, details)
75+
assert.NotNil(t, details.ActualCost())
76+
77+
// Testing with a more complex expression that will certainly exceed a 0 budget
78+
ast, iss = env.Compile(`[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(x, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(y, x * y)).filter(arr, arr.exists(e, e > 50))`)
79+
require.NoError(t, iss.Err())
80+
81+
// Setting up a large budget that should allow the expression to complete
82+
opts = WithCostTracking(1000000)
83+
program, err = env.Program(ast, opts...)
84+
require.NoError(t, err)
85+
86+
// Evaluating with large budget - should succeed
87+
val, details, err := program.Eval(map[string]interface{}{})
88+
if err == nil {
89+
t.Logf("Success! Expression evaluated with result: %v, cost: %v", val, *details.ActualCost())
90+
} else {
91+
t.Logf("Actual error: %v", err)
92+
require.Error(t, err)
93+
errMsg := err.Error()
94+
assert.Contains(t, errMsg, "cost limit exceeded", "Error should indicate cost limit exceeded")
95+
}
96+
97+
// A tiny budget to ensure the test passes
98+
opts = WithCostTracking(0)
99+
program, err = env.Program(ast, opts...)
100+
require.NoError(t, err)
101+
102+
// Evaluating with zero budget - should fail with budget exceeded
103+
_, _, err = program.Eval(map[string]interface{}{})
104+
require.Error(t, err)
105+
t.Logf("Actual error: %v", err)
106+
107+
errMsg := err.Error()
108+
assert.Contains(t, errMsg, "cost limit exceeded", "Error should indicate cost limit exceeded")
109+
}
110+
111+
func TestIsCostLimitExceeded(t *testing.T) {
112+
tests := []struct {
113+
name string
114+
err error
115+
expected bool
116+
}{
117+
{
118+
name: "nil error",
119+
err: nil,
120+
expected: false,
121+
},
122+
{
123+
name: "non-cost error",
124+
err: errors.New("some other error"),
125+
expected: false,
126+
},
127+
{
128+
name: "cost limit error",
129+
err: errors.New("operation cancelled: actual cost limit exceeded"),
130+
expected: true,
131+
},
132+
}
133+
134+
for _, tt := range tests {
135+
t.Run(tt.name, func(t *testing.T) {
136+
result := IsCostLimitExceeded(tt.err)
137+
assert.Equal(t, tt.expected, result)
138+
})
139+
}
140+
}
141+
142+
func TestWrapCostLimitExceeded(t *testing.T) {
143+
tests := []struct {
144+
name string
145+
err error
146+
totalCost int64
147+
want string
148+
wantErr bool
149+
}{
150+
{
151+
name: "nil error",
152+
err: nil,
153+
totalCost: 0,
154+
want: "",
155+
wantErr: false,
156+
},
157+
{
158+
name: "non-cost error",
159+
err: errors.New("some other error"),
160+
totalCost: 5000,
161+
want: "some other error",
162+
wantErr: true,
163+
},
164+
{
165+
name: "cost limit error",
166+
err: errors.New("operation cancelled: actual cost limit exceeded"),
167+
totalCost: 15000,
168+
want: fmt.Sprintf("CEL Cost budget exceeded: total CEL cost 15000 exceeded budget of %d", RuntimeCELCostBudget),
169+
wantErr: true,
170+
},
171+
}
172+
173+
for _, tt := range tests {
174+
t.Run(tt.name, func(t *testing.T) {
175+
err := WrapCostLimitExceeded(tt.err, tt.totalCost)
176+
if tt.wantErr {
177+
assert.Error(t, err)
178+
assert.Equal(t, tt.want, err.Error())
179+
} else {
180+
assert.NoError(t, err)
181+
}
182+
})
183+
}
184+
}

Diff for: pkg/controller/instance/controller_reconcile.go

+10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package instance
1515

1616
import (
1717
"context"
18+
"errors"
1819
"fmt"
1920

2021
"github.com/go-logr/logr"
@@ -25,6 +26,7 @@ import (
2526
"k8s.io/apimachinery/pkg/types"
2627
"k8s.io/client-go/dynamic"
2728

29+
krocel "github.com/kro-run/kro/pkg/cel"
2830
"github.com/kro-run/kro/pkg/controller/instance/delta"
2931
"github.com/kro-run/kro/pkg/metadata"
3032
"github.com/kro-run/kro/pkg/requeue"
@@ -116,6 +118,14 @@ func (igr *instanceGraphReconciler) reconcileInstance(ctx context.Context) error
116118

117119
// Synchronize runtime state after each resource
118120
if _, err := igr.runtime.Synchronize(); err != nil {
121+
if errors.Is(err, krocel.ErrCELBudgetExceeded) {
122+
resourceState := igr.state.ResourceStates[resourceID]
123+
resourceState.State = "ERROR"
124+
resourceState.Err = err
125+
igr.state.State = "CELBudgetExceeded"
126+
igr.state.ReconcileErr = err
127+
return err
128+
}
119129
return fmt.Errorf("failed to synchronize reconciling resource %s: %w", resourceID, err)
120130
}
121131
}

Diff for: pkg/graph/builder.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,10 @@ func dryRunExpression(env *cel.Env, expression string, resources map[string]*Res
625625
}
626626

627627
// TODO(a-hilaly): thinking about a creating a library to hide this...
628-
program, err := env.Program(ast)
628+
629+
//adding cost tracking options
630+
programOpts := krocel.WithCostTracking(krocel.PerCallLimit)
631+
program, err := env.Program(ast, programOpts...)
629632
if err != nil {
630633
return nil, fmt.Errorf("failed to create program: %w", err)
631634
}

0 commit comments

Comments
 (0)