diff --git a/install/helm/kgateway/templates/role.yaml b/install/helm/kgateway/templates/role.yaml index 917af45d1bbd..7b87de9d3803 100644 --- a/install/helm/kgateway/templates/role.yaml +++ b/install/helm/kgateway/templates/role.yaml @@ -126,3 +126,47 @@ rules: - get - list - watch +- apiGroups: + - inference.networking.x-k8s.io + resources: + - inferencemodels + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.x-k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch + - update +- apiGroups: + - rbac.authorization.k8s.io + # TODO [danehans]: EPP should use Role and RoleBinding resources: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224 + resources: + - clusterroles + - clusterrolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +# TODO [danehans]: Unsure why the following rules are needed: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224 +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/internal/kgateway/controller/inferencepool_controller.go b/internal/kgateway/controller/inferencepool_controller.go index 264817cde8b0..346775677b41 100644 --- a/internal/kgateway/controller/inferencepool_controller.go +++ b/internal/kgateway/controller/inferencepool_controller.go @@ -11,6 +11,7 @@ import ( gwv1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/kgateway-dev/kgateway/v2/internal/kgateway/deployer" + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/wellknown" ) type inferencePoolReconciler struct { @@ -29,11 +30,23 @@ func (r *inferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques } if pool.GetDeletionTimestamp() != nil { - // no need to do anything as we have owner refs, so children will be deleted - log.Info("inferencepool deleted, no need for reconciling") + log.Info("Removing endpoint picker for InferencePool", "name", pool.Name, "namespace", pool.Namespace) + if err := r.deployer.CleanupClusterScopedResources(ctx, pool); err != nil { + return ctrl.Result{}, err + } + // Remove the finalizer. + pool.Finalizers = removeString(pool.Finalizers, wellknown.InferencePoolFinalizer) + if err := r.cli.Update(ctx, pool); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } + // Ensure the finalizer is present for the InferencePool. + if err := r.deployer.EnsureFinalizer(ctx, pool); err != nil { + return ctrl.Result{}, err + } + // Use the registered index to list HTTPRoutes that reference this pool. var routeList gwv1.HTTPRouteList if err := r.cli.List(ctx, &routeList, @@ -58,7 +71,7 @@ func (r *inferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques // TODO [danehans]: Manage inferencepool status conditions. // Deploy the endpoint picker resources. - log.Info("Deploying endpoint picker from InferencePool", "name", pool.Name, "namespace", pool.Namespace) + log.Info("Deploying endpoint picker for InferencePool", "name", pool.Name, "namespace", pool.Namespace) err = r.deployer.DeployObjs(ctx, objs) if err != nil { return ctrl.Result{}, err @@ -68,3 +81,14 @@ func (r *inferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } + +// removeString is a helper function to remove a string from a slice. +func removeString(slice []string, s string) []string { + var result []string + for _, item := range slice { + if item != s { + result = append(result, item) + } + } + return result +} diff --git a/internal/kgateway/controller/start.go b/internal/kgateway/controller/start.go index ec0df8dc4bb5..4eec0e1090ff 100644 --- a/internal/kgateway/controller/start.go +++ b/internal/kgateway/controller/start.go @@ -34,7 +34,7 @@ import ( "github.com/kgateway-dev/kgateway/v2/internal/kgateway/utils/krtutil" "github.com/kgateway-dev/kgateway/v2/internal/kgateway/wellknown" "github.com/kgateway-dev/kgateway/v2/pkg/client/clientset/versioned" - glooschemes "github.com/kgateway-dev/kgateway/v2/pkg/schemes" + kgtwschemes "github.com/kgateway-dev/kgateway/v2/pkg/schemes" "github.com/kgateway-dev/kgateway/v2/pkg/utils/kubeutils" "github.com/kgateway-dev/kgateway/v2/pkg/utils/namespaces" ) @@ -102,7 +102,7 @@ func NewControllerBuilder(ctx context.Context, cfg StartConfig) (*ControllerBuil scheme := DefaultScheme() // Extend the scheme if the TCPRoute CRD exists. - if err := glooschemes.AddGatewayV1A2Scheme(cfg.RestConfig, scheme); err != nil { + if err := kgtwschemes.AddGatewayV1A2Scheme(cfg.RestConfig, scheme); err != nil { return nil, err } @@ -146,11 +146,17 @@ func NewControllerBuilder(ctx context.Context, cfg StartConfig) (*ControllerBuil ) // Extend the scheme and add the EPP plugin if the InferencePool CRD exists. - exists, err := glooschemes.AddInferExtV1A1Scheme(cfg.RestConfig, scheme) + exists, err := kgtwschemes.AddInferExtV1A1Scheme(cfg.RestConfig, scheme) + setupLog.Info("checking inference extension CRDs exist", "result", exists) + switch { case err != nil: return nil, err case exists: + setupLog.Info("adding inference extension endpoint picker plugin") + if cfg.ExtraPlugins == nil { + cfg.ExtraPlugins = []extensionsplug.Plugin{} + } cfg.ExtraPlugins = append(cfg.ExtraPlugins, endpointpicker.NewPlugin(ctx, commoncol)) } diff --git a/internal/kgateway/deployer/deployer.go b/internal/kgateway/deployer/deployer.go index 3983aa6d5939..252a024fa64b 100644 --- a/internal/kgateway/deployer/deployer.go +++ b/internal/kgateway/deployer/deployer.go @@ -8,15 +8,16 @@ import ( "io" "io/fs" "path/filepath" + "slices" "github.com/rotisserie/eris" - "golang.org/x/exp/slices" "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/storage" "helm.sh/helm/v3/pkg/storage/driver" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -516,19 +517,26 @@ func (d *Deployer) GetEndpointPickerObjs(pool *infextv1a1.InferencePool) ([]clie return nil, fmt.Errorf("failed to render inference extension objects: %w", err) } - // Ensure that each rendered object has its namespace set. + // Ensure that each namespaced rendered object has its namespace and ownerRef set. for _, obj := range objs { - if obj.GetNamespace() == "" { - obj.SetNamespace(pool.Namespace) + gvk := obj.GetObjectKind().GroupVersionKind() + if IsNamespaced(gvk) { + if obj.GetNamespace() == "" { + obj.SetNamespace(pool.Namespace) + } + obj.SetOwnerReferences([]metav1.OwnerReference{{ + APIVersion: pool.APIVersion, + Kind: pool.Kind, + Name: pool.Name, + UID: pool.UID, + Controller: ptr.To(true), + }}) + } else { + // TODO [danehans]: Not sure why a ns must be set for cluster-scoped objects: + // failed to apply object rbac.authorization.k8s.io/v1, Kind=ClusterRoleBinding + // vllm-llama2-7b-pool-endpoint-picker: Namespace parameter required. + obj.SetNamespace("") } - // Set owner references so that these objects are tied to the InferencePool. - obj.SetOwnerReferences([]metav1.OwnerReference{{ - APIVersion: pool.APIVersion, - Kind: pool.Kind, - Name: pool.Name, - UID: pool.UID, - Controller: ptr.To(true), - }}) } return objs, nil @@ -545,6 +553,47 @@ func (d *Deployer) DeployObjs(ctx context.Context, objs []client.Object) error { return nil } +// EnsureFinalizer adds the InferencePool finalizer to the given pool if it’s not already present. +func (d *Deployer) EnsureFinalizer(ctx context.Context, pool *infextv1a1.InferencePool) error { + if slices.Contains(pool.Finalizers, wellknown.InferencePoolFinalizer) { + return nil + } + pool.Finalizers = append(pool.Finalizers, wellknown.InferencePoolFinalizer) + return d.cli.Update(ctx, pool) +} + +// CleanupClusterScopedResources deletes the ClusterRole and ClusterRoleBinding for the given pool. +func (d *Deployer) CleanupClusterScopedResources(ctx context.Context, pool *infextv1a1.InferencePool) error { + // The same release name as in the Helm template. + releaseName := fmt.Sprintf("%s-endpoint-picker", pool.Name) + + // Delete the ClusterRole. + var cr rbacv1.ClusterRole + if err := d.cli.Get(ctx, client.ObjectKey{Name: releaseName}, &cr); err == nil { + if err := d.cli.Delete(ctx, &cr); err != nil { + return fmt.Errorf("failed to delete ClusterRole %s: %w", releaseName, err) + } + } + + // Delete the ClusterRoleBinding. + var crb rbacv1.ClusterRoleBinding + if err := d.cli.Get(ctx, client.ObjectKey{Name: releaseName}, &crb); err == nil { + if err := d.cli.Delete(ctx, &crb); err != nil { + return fmt.Errorf("failed to delete ClusterRoleBinding %s: %w", releaseName, err) + } + } + + return nil +} + +// IsNamespaced returns true if the resource is namespaced. +func IsNamespaced(gvk schema.GroupVersionKind) bool { + if gvk == wellknown.ClusterRoleGVK || gvk == wellknown.ClusterRoleBindingGVK { + return false + } + return true +} + func loadFs(filesystem fs.FS) (*chart.Chart, error) { var bufferedFiles []*loader.BufferedFile entries, err := fs.ReadDir(filesystem, ".") diff --git a/internal/kgateway/deployer/deployer_test.go b/internal/kgateway/deployer/deployer_test.go index c40d674ab474..d72468258fdd 100644 --- a/internal/kgateway/deployer/deployer_test.go +++ b/internal/kgateway/deployer/deployer_test.go @@ -1466,6 +1466,13 @@ var _ = Describe("Deployer", func() { }) Expect(err).NotTo(HaveOccurred()) + // Simulate reconciliation so that the pool gets its finalizer added. + err = d.EnsureFinalizer(context.Background(), pool) + Expect(err).NotTo(HaveOccurred()) + + // Check that the pool itself has the finalizer set. + Expect(pool.GetFinalizers()).To(ContainElement(wellknown.InferencePoolFinalizer)) + // Get the endpoint picker objects for the InferencePool. objs, err := d.GetEndpointPickerObjs(pool) Expect(err).NotTo(HaveOccurred()) @@ -1474,18 +1481,18 @@ var _ = Describe("Deployer", func() { // Find the child objects. var sa *corev1.ServiceAccount - var role *rbacv1.Role - var rb *rbacv1.RoleBinding + var clusterRole *rbacv1.ClusterRole + var crb *rbacv1.ClusterRoleBinding var dep *appsv1.Deployment var svc *corev1.Service for _, obj := range objs { switch t := obj.(type) { case *corev1.ServiceAccount: sa = t - case *rbacv1.Role: - role = t - case *rbacv1.RoleBinding: - rb = t + case *rbacv1.ClusterRole: + clusterRole = t + case *rbacv1.ClusterRoleBinding: + crb = t case *appsv1.Deployment: dep = t case *corev1.Service: @@ -1493,29 +1500,32 @@ var _ = Describe("Deployer", func() { } } Expect(sa).NotTo(BeNil(), "expected a ServiceAccount to be rendered") - Expect(role).NotTo(BeNil(), "expected a Role to be rendered") - Expect(rb).NotTo(BeNil(), "expected a RoleBinding to be rendered") + Expect(clusterRole).NotTo(BeNil(), "expected a Role to be rendered") + Expect(crb).NotTo(BeNil(), "expected a RoleBinding to be rendered") Expect(dep).NotTo(BeNil(), "expected a Deployment to be rendered") Expect(svc).NotTo(BeNil(), "expected a Service to be rendered") // Check that owner references are set on all rendered objects to the InferencePool. for _, obj := range objs { - ownerRefs := obj.GetOwnerReferences() - Expect(ownerRefs).To(HaveLen(1)) - ref := ownerRefs[0] - Expect(ref.Name).To(Equal(pool.Name)) - Expect(ref.UID).To(Equal(pool.UID)) - Expect(ref.Kind).To(Equal(pool.Kind)) - Expect(ref.APIVersion).To(Equal(pool.APIVersion)) - Expect(*ref.Controller).To(BeTrue()) + gvk := obj.GetObjectKind().GroupVersionKind() + if deployer.IsNamespaced(gvk) { + ownerRefs := obj.GetOwnerReferences() + Expect(ownerRefs).To(HaveLen(1)) + ref := ownerRefs[0] + Expect(ref.Name).To(Equal(pool.Name)) + Expect(ref.UID).To(Equal(pool.UID)) + Expect(ref.Kind).To(Equal(pool.Kind)) + Expect(ref.APIVersion).To(Equal(pool.APIVersion)) + Expect(*ref.Controller).To(BeTrue()) + } } // Validate that the rendered Deployment and Service have the expected names. // (The template hardcodes the names to "inference-gateway-ext-proc".) expectedName := fmt.Sprintf("%s-endpoint-picker", pool.Name) Expect(sa.Name).To(Equal(expectedName)) - Expect(role.Name).To(Equal(expectedName)) - Expect(rb.Name).To(Equal(expectedName)) + Expect(clusterRole.Name).To(Equal(expectedName)) + Expect(crb.Name).To(Equal(expectedName)) Expect(dep.Name).To(Equal(expectedName)) Expect(svc.Name).To(Equal(expectedName)) diff --git a/internal/kgateway/extensions2/plugins/inferenceextension/endpointpicker/plugin.go b/internal/kgateway/extensions2/plugins/inferenceextension/endpointpicker/plugin.go index fbc1c1e8c2bf..0f07134d7daa 100644 --- a/internal/kgateway/extensions2/plugins/inferenceextension/endpointpicker/plugin.go +++ b/internal/kgateway/extensions2/plugins/inferenceextension/endpointpicker/plugin.go @@ -25,7 +25,6 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - gwv1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/kgateway-dev/kgateway/v2/internal/kgateway/extensions2/common" extplug "github.com/kgateway-dev/kgateway/v2/internal/kgateway/extensions2/plugin" @@ -38,44 +37,31 @@ import ( "github.com/kgateway-dev/kgateway/v2/internal/kgateway/wellknown" ) -// TODO [danehans]: Filter InferencePools based one's that are referenced by an HTTPRoute -// with a status.parents[].controllerName that matches our Gateway controllerName. +// TODO [danehans]: Filter InferencePools based on being referenced by an HTTPRoute +// with a status.parents[].controllerName that matches the Kgateway controllerName. func NewPlugin(ctx context.Context, commonCol *common.CommonCollections) extplug.Plugin { - poolClient := kclient.New[*infextv1a1.InferencePool](commonCol.Client) - pools := krt.WrapClient(poolClient, commonCol.KrtOpts.ToOptions("InferencePools")...) - routeClient := kclient.New[*gwv1.HTTPRoute](commonCol.Client) - routes := krt.WrapClient(routeClient, commonCol.KrtOpts.ToOptions("HTTPRoutes")...) + // Use the dynamic collection helper to create a collection of InferencePool objects. + poolGVR := schema.GroupVersionResource{ + Group: infextv1a1.GroupVersion.Group, + Version: infextv1a1.GroupVersion.Version, + Resource: "inferencepools", + } + pools := krtutil.SetupCollectionDynamic[infextv1a1.InferencePool](ctx, commonCol.Client, poolGVR, commonCol.KrtOpts.ToOptions("InferencePools")...) + svcClient := kclient.New[*corev1.Service](commonCol.Client) svcs := krt.WrapClient(svcClient, commonCol.KrtOpts.ToOptions("Services")...) - return NewPluginFromCollections(ctx, commonCol, pools, routes, svcs, commonCol.Pods, commonCol.Settings) + return NewPluginFromCollections(ctx, commonCol, pools, svcs, commonCol.Pods, commonCol.Settings) } func NewPluginFromCollections( ctx context.Context, commonCol *common.CommonCollections, poolCol krt.Collection[*infextv1a1.InferencePool], - routeCol krt.Collection[*gwv1.HTTPRoute], svcCol krt.Collection[*corev1.Service], podCol krt.Collection[krtcollections.LocalityPod], stngs settings.Settings, ) extplug.Plugin { - // Create an index on HTTPRoutes by the InferencePool they reference. - httpRoutesByInferencePool := krt.NewIndex(routeCol, func(route *gwv1.HTTPRoute) []types.NamespacedName { - var refs []types.NamespacedName - for _, rule := range route.Spec.Rules { - for _, backend := range rule.BackendRefs { - if backend.Kind != nil && *backend.Kind == wellknown.InferencePoolKind { - refs = append(refs, types.NamespacedName{ - Namespace: route.Namespace, - Name: string(backend.Name), - }) - } - } - } - return refs - }) - // The InferencePool group kind used by the BackendObjectIR and the ContributesBackendObjectIRs plugin. gk := schema.GroupKind{ Group: infextv1a1.GroupVersion.Group, @@ -90,30 +76,6 @@ func NewPluginFromCollections( // Create a BackendObjectIR from the InferencePool. us := krt.NewCollection(poolCol, func(kctx krt.HandlerContext, pool *infextv1a1.InferencePool) *ir.BackendObjectIR { - poolKey := types.NamespacedName{Namespace: pool.Namespace, Name: pool.Name} - matchingRoutes := httpRoutesByInferencePool.Lookup(poolKey) - - valid := false - for _, route := range matchingRoutes { - // Iterate over status.parents and check if any match the Gateway controller - for _, parent := range route.Status.Parents { - if parent.ControllerName == gwv1.GatewayController(wellknown.GatewayControllerName) { - valid = true - break - } - } - if valid { - // Only one match is required for an InferencePool to be considered managed by this plugin. - break - } - } - - if !valid { - // Skip this InferencePool if it has no valid HTTPRoute references. - // TODO [danehans]: Surface a status condition. - return nil - } - // This InferencePool is valid, create an BackendObjectIR IR representation. return &ir.BackendObjectIR{ ObjectSource: ir.ObjectSource{ @@ -262,13 +224,31 @@ type InfPoolEndpointsInputs struct { KrtOpts krtutil.KrtOptions } +func (i InfPoolEndpointsInputs) ResourceName() string { + return "inference-pool-inputs" +} + +// in case multiple policies attached to the same resource, we sort by policy creation time. +func (i InfPoolEndpointsInputs) CreationTime() time.Time { + // settings always created at the same time + return time.Time{} +} + +func (i InfPoolEndpointsInputs) Equals(in any) bool { + s, ok := in.(InfPoolEndpointsInputs) + if !ok { + return false + } + return i == s +} + func newInfPoolEndpointsInputs( krtOpts krtutil.KrtOptions, - infPoolBackendObjectIRs krt.Collection[ir.BackendObjectIR], + backendObjectIRs krt.Collection[ir.BackendObjectIR], podCol krt.Collection[krtcollections.LocalityPod], ) InfPoolEndpointsInputs { return InfPoolEndpointsInputs{ - BackendObjectIRs: infPoolBackendObjectIRs, + BackendObjectIRs: backendObjectIRs, Pods: podCol, KrtOpts: krtOpts, } diff --git a/internal/kgateway/helm/inference-extension/templates/endpoint-picker/resources.yaml b/internal/kgateway/helm/inference-extension/templates/endpoint-picker/resources.yaml index 0c8bac5dddce..6052f4bd14f7 100644 --- a/internal/kgateway/helm/inference-extension/templates/endpoint-picker/resources.yaml +++ b/internal/kgateway/helm/inference-extension/templates/endpoint-picker/resources.yaml @@ -4,8 +4,9 @@ kind: ServiceAccount metadata: name: {{ .Release.Name }} --- + # TODO [danehans]: EPP should use Role and RoleBinding resources: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224 apiVersion: rbac.authorization.k8s.io/v1 -kind: Role +kind: ClusterRole metadata: name: {{ .Release.Name }} rules: @@ -21,6 +22,7 @@ rules: - apiGroups: ["discovery.k8s.io"] resources: ["endpointslices"] verbs: ["get", "watch", "list"] + # TODO [danehans]: Unsure why the following rules are needed: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/224 - apiGroups: - authentication.k8s.io resources: @@ -35,14 +37,15 @@ rules: - create --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: {{ .Release.Name }} subjects: - kind: ServiceAccount name: {{ .Release.Name }} + namespace: {{ $endpointPicker.poolNamespace }} roleRef: - kind: Role + kind: ClusterRole name: {{ .Release.Name }} --- apiVersion: apps/v1 @@ -52,7 +55,7 @@ metadata: labels: app.kubernetes.io/component: endpoint-picker app.kubernetes.io/name: {{ .Release.Name }} - app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/instance: kgateway spec: replicas: 1 selector: @@ -102,7 +105,7 @@ metadata: labels: app.kubernetes.io/component: endpoint-picker app.kubernetes.io/name: {{ .Release.Name }} - app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/instance: kgateway spec: selector: app: {{ .Release.Name }} diff --git a/internal/kgateway/krtcollections/policy.go b/internal/kgateway/krtcollections/policy.go index 6d4982bbaab7..9dda86ea5512 100644 --- a/internal/kgateway/krtcollections/policy.go +++ b/internal/kgateway/krtcollections/policy.go @@ -8,6 +8,7 @@ import ( "istio.io/istio/pkg/kube/krt" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" gwv1 "sigs.k8s.io/gateway-api/apis/v1" gwv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2" gwv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1" @@ -16,6 +17,7 @@ import ( "github.com/kgateway-dev/kgateway/v2/internal/kgateway/ir" "github.com/kgateway-dev/kgateway/v2/internal/kgateway/translator/backendref" "github.com/kgateway-dev/kgateway/v2/internal/kgateway/utils/krtutil" + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/wellknown" ) var ( @@ -646,12 +648,24 @@ func (h *RoutesIndex) resolveExtension(kctx krt.HandlerContext, ns string, ext g } func toFromBackendRef(fromns string, ref gwv1.BackendObjectReference) ir.ObjectSource { - return ir.ObjectSource{ + // Defaults to Service kind for returned ObjectSource. + ret := ir.ObjectSource{ Group: strOr(ref.Group, ""), - Kind: strOr(ref.Kind, "Service"), + Kind: strOr(ref.Kind, wellknown.ServiceKind), Namespace: strOr(ref.Namespace, fromns), Name: string(ref.Name), } + + // Change to the the InferencePool group/kind if needed. + if ref.Group != nil && + *ref.Group == gwv1.Group(infextv1a1.GroupVersion.Group) && + ref.Kind != nil && + *ref.Kind == wellknown.InferencePoolKind { + ret.Group = infextv1a1.GroupVersion.Group + ret.Kind = wellknown.InferencePoolKind + } + + return ret } func (h *RoutesIndex) getBackends(kctx krt.HandlerContext, src ir.ObjectSource, backendRefs []gwv1.HTTPBackendRef) []ir.HttpBackendOrDelegate { diff --git a/internal/kgateway/wellknown/controller.go b/internal/kgateway/wellknown/controller.go index 0fe97af4cc31..00a707d3fed6 100644 --- a/internal/kgateway/wellknown/controller.go +++ b/internal/kgateway/wellknown/controller.go @@ -17,4 +17,8 @@ const ( // DefaultGatewayParametersName is the name of the GatewayParameters which is attached by // parametersRef to the GatewayClass. DefaultGatewayParametersName = "kgateway" + + // InferencePoolFinalizer is the InferencePool finalizer name to ensure cluster-scoped + // objects are cleaned up. + InferencePoolFinalizer = "kgateway/inferencepool-cleanup" ) diff --git a/internal/kgateway/wellknown/kube.go b/internal/kgateway/wellknown/kube.go index 34ba6fde3f90..c831841e8ea9 100644 --- a/internal/kgateway/wellknown/kube.go +++ b/internal/kgateway/wellknown/kube.go @@ -3,6 +3,7 @@ package wellknown import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" ) var ( @@ -11,5 +12,9 @@ var ( ServiceGVK = corev1.SchemeGroupVersion.WithKind("Service") ServiceAccountGVK = corev1.SchemeGroupVersion.WithKind("ServiceAccount") + // RBAC GVKs + ClusterRoleGVK = rbacv1.SchemeGroupVersion.WithKind("ClusterRoleBinding") + ClusterRoleBindingGVK = rbacv1.SchemeGroupVersion.WithKind("ClusterRole") + DeploymentGVK = appsv1.SchemeGroupVersion.WithKind("Deployment") ) diff --git a/pkg/schemes/extended_scheme.go b/pkg/schemes/extended_scheme.go index dc4e1b23afce..8be19f7329fe 100644 --- a/pkg/schemes/extended_scheme.go +++ b/pkg/schemes/extended_scheme.go @@ -34,7 +34,7 @@ func AddGatewayV1A2Scheme(restConfig *rest.Config, scheme *runtime.Scheme) error // AddInferExtV1A1Scheme adds the Inference Extension v1alpha1 scheme to the provided scheme if the InferencePool CRD exists. func AddInferExtV1A1Scheme(restConfig *rest.Config, scheme *runtime.Scheme) (bool, error) { - exists, err := CRDExists(restConfig, infextv1a1.GroupVersion.Group, gwv1a2.GroupVersion.Version, wellknown.InferencePoolKind) + exists, err := CRDExists(restConfig, infextv1a1.GroupVersion.Group, infextv1a1.GroupVersion.Version, wellknown.InferencePoolKind) if err != nil { return false, fmt.Errorf("error checking if %s CRD exists: %w", wellknown.InferencePoolKind, err) }