Skip to content

Commit

Permalink
Adds initial Inference Extension Support
Browse files Browse the repository at this point in the history
Signed-off-by: Daneyon Hansen <[email protected]>
  • Loading branch information
danehans committed Mar 8, 2025
1 parent 07c0dfc commit c01ceec
Show file tree
Hide file tree
Showing 29 changed files with 1,915 additions and 81 deletions.
7 changes: 4 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/kelseyhightower/envconfig v1.4.0
github.com/mitchellh/hashstructure v1.0.0
github.com/onsi/ginkgo/v2 v2.22.1
github.com/onsi/ginkgo/v2 v2.22.2
github.com/onsi/gomega v1.36.2
github.com/pkg/errors v0.9.1
github.com/rotisserie/eris v0.5.4
Expand Down Expand Up @@ -52,9 +52,10 @@ require (
k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7
k8s.io/utils v0.0.0-20241210054802-24370beab758
knative.dev/pkg v0.0.0-20211206113427-18589ac7627e
sigs.k8s.io/controller-runtime v0.20.0
sigs.k8s.io/controller-runtime v0.20.2
sigs.k8s.io/controller-tools v0.16.5
sigs.k8s.io/gateway-api v1.2.1
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250219213427-2577f63f6a1c
sigs.k8s.io/structured-merge-diff/v4 v4.5.0
sigs.k8s.io/yaml v1.4.0
)
Expand Down Expand Up @@ -98,7 +99,7 @@ require (
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/evanphx/json-patch v5.9.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
github.com/fatih/color v1.18.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
Expand Down
14 changes: 8 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4=
github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
Expand Down Expand Up @@ -738,8 +738,8 @@ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/ginkgo/v2 v2.22.1 h1:QW7tbJAUDyVDVOM5dFa7qaybo+CRfR7bemlQUN6Z8aM=
github.com/onsi/ginkgo/v2 v2.22.1/go.mod h1:S6aTpoRsSq2cZOd+pssHAlKW/Q/jZt6cPrPlnj4a1xM=
github.com/onsi/ginkgo/v2 v2.22.2 h1:/3X8Panh8/WwhU/3Ssa6rCKqPLuAkVY2I0RoyDLySlU=
github.com/onsi/ginkgo/v2 v2.22.2/go.mod h1:oeMosUL+8LtarXBHu/c0bx2D/K9zyQ6uX3cTyztHwsk=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
Expand Down Expand Up @@ -1627,12 +1627,14 @@ rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.22/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.1 h1:uOuSLOMBWkJH0TWa9X6l+mj5nZdm6Ay6Bli8HL8rNfk=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.1/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
sigs.k8s.io/controller-runtime v0.20.0 h1:jjkMo29xEXH+02Md9qaVXfEIaMESSpy3TBWPrsfQkQs=
sigs.k8s.io/controller-runtime v0.20.0/go.mod h1:BrP3w158MwvB3ZbNpaAcIKkHQ7YGpYnzpoSTZ8E14WU=
sigs.k8s.io/controller-runtime v0.20.2 h1:/439OZVxoEc02psi1h4QO3bHzTgu49bb347Xp4gW1pc=
sigs.k8s.io/controller-runtime v0.20.2/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
sigs.k8s.io/controller-tools v0.16.5 h1:5k9FNRqziBPwqr17AMEPPV/En39ZBplLAdOwwQHruP4=
sigs.k8s.io/controller-tools v0.16.5/go.mod h1:8vztuRVzs8IuuJqKqbXCSlXcw+lkAv/M2sTpg55qjMY=
sigs.k8s.io/gateway-api v1.2.1 h1:fZZ/+RyRb+Y5tGkwxFKuYuSRQHu9dZtbjenblleOLHM=
sigs.k8s.io/gateway-api v1.2.1/go.mod h1:EpNfEXNjiYfUJypf0eZ0P5iXA9ekSGWaS1WgPaM42X0=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250219213427-2577f63f6a1c h1:YyTNvnfjzdiHXFQdRzouvQO9SKFwZkgQffnbr9YADFE=
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20250219213427-2577f63f6a1c/go.mod h1:H2DbSVDbCxG2cNTTgYC+V3RiotW077Xkx3fA3mRAwXs=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo=
Expand Down
5 changes: 3 additions & 2 deletions hack/utils/oss_compliance/osa_provided.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Name|Version|License
[grpc-ecosystem/go-grpc-middleware](https://github.com/grpc-ecosystem/go-grpc-middleware)|v1.4.0|Apache License 2.0
[kelseyhightower/envconfig](https://github.com/kelseyhightower/envconfig)|v1.4.0|MIT License
[mitchellh/hashstructure](https://github.com/mitchellh/hashstructure)|v1.0.0|MIT License
[ginkgo/v2](https://github.com/onsi/ginkgo)|v2.22.1|MIT License
[ginkgo/v2](https://github.com/onsi/ginkgo)|v2.22.2|MIT License
[onsi/gomega](https://github.com/onsi/gomega)|v1.36.2|MIT License
[pkg/errors](https://github.com/pkg/errors)|v0.9.1|BSD 2-clause "Simplified" License
[rotisserie/eris](https://github.com/rotisserie/eris)|v0.5.4|MIT License
Expand All @@ -45,9 +45,10 @@ Name|Version|License
[k8s.io/kube-openapi](https://k8s.io/kube-openapi)|v0.0.0-20241212222426-2c72e554b1e7|Apache License 2.0
[k8s.io/utils](https://k8s.io/utils)|v0.0.0-20241210054802-24370beab758|Apache License 2.0
[knative.dev/pkg](https://knative.dev/pkg)|v0.0.0-20211206113427-18589ac7627e|Apache License 2.0
[sigs.k8s.io/controller-runtime](https://sigs.k8s.io/controller-runtime)|v0.20.0|Apache License 2.0
[sigs.k8s.io/controller-runtime](https://sigs.k8s.io/controller-runtime)|v0.20.2|Apache License 2.0
[sigs.k8s.io/controller-tools](https://sigs.k8s.io/controller-tools)|v0.16.5|Apache License 2.0
[sigs.k8s.io/gateway-api](https://sigs.k8s.io/gateway-api)|v1.2.1|Apache License 2.0
[sigs.k8s.io/gateway-api-inference-extension](https://sigs.k8s.io/gateway-api-inference-extension)|v0.0.0-20250219213427-2577f63f6a1c|Apache License 2.0
[structured-merge-diff/v4](https://sigs.k8s.io/structured-merge-diff/v4)|v4.5.0|Apache License 2.0
[sigs.k8s.io/yaml](https://sigs.k8s.io/yaml)|v1.4.0|MIT License
[cmd/goimports](https://golang.org/x/tools/cmd/goimports)|latest|MIT License
Expand Down
44 changes: 44 additions & 0 deletions install/helm/kgateway/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,47 @@ rules:
- get
- list
- watch
- apiGroups:
- inference.networking.x-k8s.io
resources:
- inferencemodels
verbs:
- get
- list
- watch
- apiGroups:
- inference.networking.x-k8s.io
resources:
- inferencepools
verbs:
- get
- list
- watch
- update
- apiGroups:
- rbac.authorization.k8s.io
# TODO [danehans]: EPP should use Role and RoleBinding resources: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224
resources:
- clusterroles
- clusterrolebindings
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
# TODO [danehans]: Unsure why the following rules are needed: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
183 changes: 178 additions & 5 deletions internal/kgateway/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
apiv1 "sigs.k8s.io/gateway-api/apis/v1"

"github.com/kgateway-dev/kgateway/v2/api/v1alpha1"
Expand All @@ -25,10 +26,13 @@ import (
)

const (
// field name used for indexing
// GatewayParamsField is the field name used for indexing Gateway objects.
GatewayParamsField = "gateway-params"
// InferencePoolField is the field name used for indexing HTTPRoute objects.
InferencePoolField = "inferencepool-index"
)

// TODO [danehans]: Refactor so controller config is organized into shared and Gateway/InferencePool-specific controllers.
type GatewayConfig struct {
Mgr manager.Manager

Expand All @@ -45,7 +49,7 @@ type GatewayConfig struct {

func NewBaseGatewayController(ctx context.Context, cfg GatewayConfig) error {
log := log.FromContext(ctx)
log.V(5).Info("starting controller", "controllerName", cfg.ControllerName)
log.V(5).Info("starting gateway controller", "controllerName", cfg.ControllerName)

controllerBuilder := &controllerBuilder{
cfg: cfg,
Expand All @@ -62,6 +66,29 @@ func NewBaseGatewayController(ctx context.Context, cfg GatewayConfig) error {
)
}

type InferencePoolConfig struct {
Mgr manager.Manager
ControllerName string
InferenceExt *deployer.InferenceExtInfo
}

func NewBaseInferencePoolController(ctx context.Context, poolCfg *InferencePoolConfig, gwCfg *GatewayConfig) error {
log := log.FromContext(ctx)
log.V(5).Info("starting inferencepool controller", "controllerName", poolCfg.ControllerName)

// TODO [danehans]: Make GatewayConfig optional since Gateway and InferencePool are independent controllers.
controllerBuilder := &controllerBuilder{
cfg: *gwCfg,
poolCfg: poolCfg,
reconciler: &controllerReconciler{
cli: poolCfg.Mgr.GetClient(),
scheme: poolCfg.Mgr.GetScheme(),
},
}

return run(ctx, controllerBuilder.watchInferencePool)
}

func run(ctx context.Context, funcs ...func(ctx context.Context) error) error {
for _, f := range funcs {
if err := f(ctx); err != nil {
Expand All @@ -72,8 +99,8 @@ func run(ctx context.Context, funcs ...func(ctx context.Context) error) error {
}

type controllerBuilder struct {
cfg GatewayConfig

cfg GatewayConfig
poolCfg *InferencePoolConfig
reconciler *controllerReconciler
}

Expand All @@ -98,7 +125,7 @@ func (c *controllerBuilder) watchGw(ctx context.Context) error {
// setup a deployer
log := log.FromContext(ctx)

log.Info("creating deployer", "ctrlname", c.cfg.ControllerName, "server", c.cfg.ControlPlane.XdsHost, "port", c.cfg.ControlPlane.XdsPort)
log.Info("creating gateway deployer", "ctrlname", c.cfg.ControllerName, "server", c.cfg.ControlPlane.XdsHost, "port", c.cfg.ControlPlane.XdsPort)
d, err := deployer.NewDeployer(c.cfg.Mgr.GetClient(), &deployer.Inputs{
ControllerName: c.cfg.ControllerName,
Dev: c.cfg.Dev,
Expand Down Expand Up @@ -181,6 +208,152 @@ func (c *controllerBuilder) watchGw(ctx context.Context) error {
return nil
}

func (c *controllerBuilder) addHTTPRouteIndexes(ctx context.Context) error {
return c.cfg.Mgr.GetFieldIndexer().IndexField(ctx, new(apiv1.HTTPRoute), InferencePoolField, httpRouteInferencePoolIndex)
}

func httpRouteInferencePoolIndex(obj client.Object) []string {
route, ok := obj.(*apiv1.HTTPRoute)
if !ok {
// Should never happen, but return empty slice in case of unexpected type.
return nil
}

var poolNames []string
for _, rule := range route.Spec.Rules {
for _, ref := range rule.BackendRefs {
if ref.Kind != nil && *ref.Kind == wellknown.InferencePoolKind {
poolNames = append(poolNames, string(ref.Name))
}
}
}
return poolNames
}

// watchInferencePool adds a watch on InferencePool and HTTPRoute objects (that reference an InferencePool)
// to trigger reconciliation.
func (c *controllerBuilder) watchInferencePool(ctx context.Context) error {
log := log.FromContext(ctx)
log.Info("creating inference extension deployer", "controller", c.cfg.ControllerName)

// Register the HTTPRoute index.
if err := c.addHTTPRouteIndexes(ctx); err != nil {
return fmt.Errorf("failed to register HTTPRoute index: %w", err)
}

// Create a deployer using the controllerBuilder as inputs.
d, err := deployer.NewDeployer(c.cfg.Mgr.GetClient(), &deployer.Inputs{
ControllerName: c.cfg.ControllerName,
InferenceExtension: c.poolCfg.InferenceExt,
})
if err != nil {
return err
}

buildr := ctrl.NewControllerManagedBy(c.cfg.Mgr).
For(&infextv1a1.InferencePool{}, builder.WithPredicates(
predicate.Or(
predicate.AnnotationChangedPredicate{},
predicate.GenerationChangedPredicate{},
),
)).
// Watch HTTPRoute objects so that changes there trigger a reconcile for referenced InferencePools.
Watches(&apiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request {
route, ok := obj.(*apiv1.HTTPRoute)
if !ok {
return nil
}

// Use the index function to get the inference pool names.
poolNames := httpRouteInferencePoolIndex(route)
if len(poolNames) == 0 {
return nil
}

hasOurGateway := false
for _, parentRef := range route.Spec.ParentRefs {
// We only care about references to Gateways.
if parentRef.Group != nil && string(*parentRef.Group) == apiv1.GroupName &&
parentRef.Kind != nil && *parentRef.Kind == wellknown.GatewayKind {

Check failure on line 277 in internal/kgateway/controller/controller.go

View workflow job for this annotation

GitHub Actions / Lint Checks

unnecessary leading newline (whitespace)

// Determine the namespace of the Gateway. If parentRef.Namespace is nil/empty,
// it defaults to the route's namespace.
gwNamespace := route.Namespace
if parentRef.Namespace != nil && *parentRef.Namespace != "" {
gwNamespace = string(*parentRef.Namespace)
}
gwName := string(parentRef.Name)

// Fetch the Gateway
var gw apiv1.Gateway
if err := c.cfg.Mgr.GetClient().Get(ctx, client.ObjectKey{
Namespace: gwNamespace,
Name: gwName,
}, &gw); err != nil {
// If we cannot get it, skip this parentRef
continue
}

// Check if the Gateway is recognized as "ours"
if c.cfg.OurGateway(&gw) {
hasOurGateway = true
break
}
}
}
if !hasOurGateway {
// If no parentRef references one of our Gateways, skip it.
return nil
}

// The HTTPRoute references an InferencePool and one of our Gateways.
// Enqueue each referenced InferencePool for reconciliation.
var reqs []reconcile.Request
for _, poolName := range poolNames {
reqs = append(reqs, reconcile.Request{
NamespacedName: client.ObjectKey{
Namespace: route.Namespace,
Name: poolName,
},
})
}
return reqs
}))

// Watch child objects, e.g. Deployments, created by the inference pool deployer.
gvks, err := d.GetGvksToWatch(ctx)
if err != nil {
return err
}
for _, gvk := range gvks {
obj, err := c.cfg.Mgr.GetScheme().New(gvk)
if err != nil {
return err
}
clientObj, ok := obj.(client.Object)
if !ok {
return fmt.Errorf("object %T is not a client.Object", obj)
}
log.Info("watching gvk as inferencepool child", "gvk", gvk)
var opts []builder.OwnsOption
if shouldIgnoreStatusChild(gvk) {
opts = append(opts, builder.WithPredicates(predicate.GenerationChangedPredicate{}))
}
buildr.Owns(clientObj, opts...)
}

r := &inferencePoolReconciler{
cli: c.cfg.Mgr.GetClient(),
scheme: c.cfg.Mgr.GetScheme(),
deployer: d,
}
if err := buildr.Complete(r); err != nil {
return err
}

return nil
}

func shouldIgnoreStatusChild(gvk schema.GroupVersionKind) bool {
// avoid triggering on pod changes that update deployment status
return gvk.Kind == "Deployment"
Expand Down
Loading

0 comments on commit c01ceec

Please sign in to comment.