Skip to content

Commit ede4b05

Browse files
feat: bound the KCL native (off-heap) memory growth in-process
function-kcl recompiles the whole KCL module on every reconcile. The native KCL runtime (loaded via dlopen/cgo) accumulates off-heap memory per compile that Go's GC, GOGC and GOMEMLIMIT cannot bound, so long-lived pods climb to their memory limit and get OOMKilled mid-render, dropping reconciles (connection reset / DeadlineExceeded). A true compile cache (BuildProgram/ExecArtifact) is not reachable through the native C-ABI this function uses, so this bounds the growth at a process boundary instead, with two complementary, opt-in mechanisms (both nil-safe and default-off): * recycle.go — a watchdog samples process RSS (incl. native off-heap memory) plus optional reconcile-count / lifetime limits; on threshold it stops accepting new reconciles, drains in-flight ones, and exits 0 so the orchestrator restarts the pod cleanly between renders instead of OOMKilling it during one. Defaults to 85% of the detected cgroup limit. * rendercache.go — memoises the KCL pipeline output keyed on the exact serialized input. A composition function is deterministic over its input, so byte-identical reconciles return the cached output without invoking the KCL runtime, skipping the recompile and a leak increment. Opt-in via FUNCTION_KCL_RENDER_CACHE_SIZE; bounded LRU with optional TTL. grpc is promoted to a direct dependency for codes/status. Signed-off-by: Callum MacDonald <callum@stakater.com>
1 parent 69432e9 commit ede4b05

7 files changed

Lines changed: 923 additions & 12 deletions

File tree

fn.go

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
"github.com/crossplane/crossplane-runtime/v2/pkg/errors"
1313
"github.com/crossplane/crossplane-runtime/v2/pkg/logging"
14+
"google.golang.org/grpc/codes"
15+
"google.golang.org/grpc/status"
1416
"google.golang.org/protobuf/types/known/structpb"
1517
"k8s.io/apimachinery/pkg/runtime"
1618
"kcl-lang.io/krm-kcl/pkg/api"
@@ -42,10 +44,20 @@ type Function struct {
4244

4345
log logging.Logger
4446
dependencies string
47+
recycler *recycler
48+
cache *renderCache
4549
}
4650

4751
// RunFunction runs the Function.
4852
func (f *Function) RunFunction(_ context.Context, req *fnv1.RunFunctionRequest) (*fnv1.RunFunctionResponse, error) {
53+
// Reject new work while the process is draining for a memory recycle so
54+
// Crossplane retries this reconcile elsewhere instead of having it cut off
55+
// mid-render. begin() also bounds the recycle drain to in-flight calls.
56+
if !f.recycler.begin() {
57+
return nil, status.Error(codes.Unavailable, "function-kcl is recycling to release native memory; retry")
58+
}
59+
defer f.recycler.end()
60+
4961
log := f.log.WithValues("tag", req.GetMeta().GetTag())
5062
log.Debug("Running Function")
5163

@@ -189,7 +201,6 @@ func (f *Function) RunFunction(_ context.Context, req *fnv1.RunFunctionRequest)
189201
response.Fatal(rsp, err)
190202
return rsp, nil
191203
}
192-
inputBytes, outputBytes := bytes.NewBuffer([]byte{}), bytes.NewBuffer([]byte{})
193204
// Convert the function-kcl KCLInput to the KRM-KCL spec and run function pipelines.
194205
// Input Example: https://github.com/kcl-lang/krm-kcl/blob/main/examples/mutation/set-annotations/suite/good.yaml
195206
in.APIVersion = v1alpha1.KCLRunAPIVersion
@@ -200,16 +211,28 @@ func (f *Function) RunFunction(_ context.Context, req *fnv1.RunFunctionRequest)
200211
response.Fatal(rsp, errors.Wrap(err, "cannot marshal input to yaml"))
201212
return rsp, nil
202213
}
203-
inputBytes.Write(kclRunBytes)
204-
// Run pipeline to get the result mutated or validated by the KCL source.
205-
pipeline := kio.NewPipeline(inputBytes, outputBytes, false)
206-
207-
if err := pipeline.Execute(); err != nil {
208-
response.Fatal(rsp, errors.Wrap(err, "failed to run kcl function pipelines"))
209-
return rsp, nil
214+
// The KCL render is deterministic over kclRunBytes (source + dependencies +
215+
// all params + config). Reuse the previous output for byte-identical inputs
216+
// to skip recompiling the module — this avoids both the CPU cost and a native
217+
// memory-leak increment on no-op re-syncs. See rendercache.go.
218+
var outputData []byte
219+
if cached, ok := f.cache.lookup(kclRunBytes); ok {
220+
outputData = cached
221+
hits, misses := f.cache.stats()
222+
log.Debug("render cache hit", "hits", hits, "misses", misses)
223+
} else {
224+
inputBytes, outputBytes := bytes.NewBuffer(kclRunBytes), bytes.NewBuffer([]byte{})
225+
// Run pipeline to get the result mutated or validated by the KCL source.
226+
pipeline := kio.NewPipeline(inputBytes, outputBytes, false)
227+
if err := pipeline.Execute(); err != nil {
228+
response.Fatal(rsp, errors.Wrap(err, "failed to run kcl function pipelines"))
229+
return rsp, nil
230+
}
231+
outputData = outputBytes.Bytes()
232+
f.cache.store(kclRunBytes, outputData)
210233
}
211-
log.Debug(fmt.Sprintf("Pipeline output: %v", outputBytes.String()))
212-
data, err := pkgresource.DataResourcesFromYaml(outputBytes.Bytes())
234+
log.Debug(fmt.Sprintf("Pipeline output: %v", string(outputData)))
235+
data, err := pkgresource.DataResourcesFromYaml(outputData)
213236
if err != nil {
214237
response.Fatal(rsp, errors.Wrapf(err, "cannot parse data resources from the pipeline output in %T", rsp))
215238
return rsp, nil

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ require (
1212
github.com/go-logr/logr v1.4.3
1313
github.com/google/go-cmp v0.7.0
1414
github.com/pkg/errors v0.9.1
15+
google.golang.org/grpc v1.78.0
1516
google.golang.org/protobuf v1.36.11
1617
gopkg.in/yaml.v2 v2.4.0
1718
k8s.io/apimachinery v0.35.4
@@ -244,7 +245,6 @@ require (
244245
google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 // indirect
245246
google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 // indirect
246247
google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect
247-
google.golang.org/grpc v1.78.0 // indirect
248248
gopkg.in/inf.v0 v0.9.1 // indirect
249249
gopkg.in/warnings.v0 v0.1.2 // indirect
250250
gopkg.in/yaml.v3 v3.0.1 // indirect

main.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,18 @@ func (c *CLI) Run() error {
3636
if err != nil {
3737
return err
3838
}
39-
return function.Serve(&Function{dependencies: dependencies, log: log},
39+
// Watchdog that recycles the process before the KCL native memory leak can
40+
// OOMKill it mid-reconcile. Configured via FUNCTION_KCL_MAX_* env vars;
41+
// no-op when no trigger is enabled.
42+
rec := newRecycler(log, recycleConfigFromEnv())
43+
rec.run()
44+
// Optional render cache: memoise KCL output for byte-identical reconciles to
45+
// skip recompilation (CPU + leak). Enabled via FUNCTION_KCL_RENDER_CACHE_SIZE.
46+
cache := newRenderCacheFromEnv()
47+
if cache.enabled() {
48+
log.Info("render cache enabled", "maxEntries", cache.max, "ttl", cache.ttl.String())
49+
}
50+
return function.Serve(&Function{dependencies: dependencies, log: log, recycler: rec, cache: cache},
4051
function.Listen(c.Network, c.Address),
4152
function.MTLSCertificates(c.TLSCertsDir),
4253
function.Insecure(c.Insecure),

0 commit comments

Comments
 (0)