From 2a2d0757ce9549e8a7f4b3d1e4e5dc4e2d2a36e6 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Tue, 28 Jan 2025 19:04:05 +0000 Subject: [PATCH 1/9] InferencePool Config API --- api/v1alpha1/inferencepool_types.go | 78 +++++++++++++++++ api/v1alpha1/zz_generated.deepcopy.go | 87 +++++++++++++++++++ .../api/v1alpha1/endpointpickerconfig.go | 38 ++++++++ .../api/v1alpha1/extensionconfig.go | 67 ++++++++++++++ .../api/v1alpha1/extensionconnection.go | 51 +++++++++++ .../api/v1alpha1/extensionreference.go | 56 ++++++++++++ .../api/v1alpha1/inferencepoolspec.go | 20 ++++- client-go/applyconfiguration/utils.go | 8 ++ ...ce.networking.x-k8s.io_inferencepools.yaml | 71 +++++++++++++++ 9 files changed, 474 insertions(+), 2 deletions(-) create mode 100644 client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go create mode 100644 client-go/applyconfiguration/api/v1alpha1/extensionconfig.go create mode 100644 client-go/applyconfiguration/api/v1alpha1/extensionconnection.go create mode 100644 client-go/applyconfiguration/api/v1alpha1/extensionreference.go diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index d89b8df5..4623e2fb 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -59,8 +59,86 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required TargetPortNumber int32 `json:"targetPortNumber"` + + // EndpointPickerConfig configures the extension that runs the endpoint picking service. + // to this pool. + EndpointPickerConfig `json:"endpointPickerConfig"` +} + +type EndpointPickerConfig struct { + // Extension configures an endpoint picker as an extension service. + // + // +optional + Extension *ExtensionConfig `json:"extension"` +} + +// ExtensionConfig specifies how to configure an extension that runs the endpoint picker. +type ExtensionConfig struct { + // ExtensionRef is a reference to a service extension. + ExtensionRef *ExtensionReference `json:"extensionRef"` + + // ExtensionConnection configures the connection between the gateway and the extension. + ExtensionConnection `json:"extensionConnection"` } +// ExtensionReference is a reference to the extension deployment. +type ExtensionReference struct { + // Group is the group of the referent. + // When unspecified or empty string, core API group is inferred. + // + // +optional + // +kubebuilder:default="" + Group *string `json:"group,omitempty"` + + // Kind is the Kubernetes resource kind of the referent. For example + // "Service". + // + // Defaults to "Service" when not specified. + // + // ExternalName services can refer to CNAME DNS records that may live + // outside of the cluster and as such are difficult to reason about in + // terms of conformance. They also may not be safe to forward to (see + // CVE-2021-25740 for more information). Implementations SHOULD NOT + // support ExternalName Services. + // + // Support: Core (Services with a type other than ExternalName) + // + // Support: Implementation-specific (Services with type ExternalName) + // + // +optional + // +kubebuilder:default=Service + Kind *string `json:"kind,omitempty"` + + // Name is the name of the referent. + Name string `json:"name"` +} + +// ExtensionConnection encapsulates options that configures the connection to the extension. +type ExtensionConnection struct { + // The port number on the pods running the extension. Defaults to 9002 if not set. + // + // +kubebuilder:default=9002 + TargetPortNumber *int32 `json:"targetPortNumber"` + + // Configures how the gateway handles the case when the extension is not responsive. + // Defaults to failClose. + // + // +kubebuilder:default="FailClose" + FailureMode ExtensionFailureMode `json:"failureMode"` +} + +// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not +// responsive. +// +kubebuilder:validation:Enum=FailOpen;FailClose +type ExtensionFailureMode string + +const ( + // The endpoint will be selected via the provider’s LB configured algorithm. + FailOpen ExtensionFailureMode = "FailOpen" + // Requests should be dropped. + FailClose ExtensionFailureMode = "FailClose" +) + // LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 // Duplicated as to not take an unexpected dependency on gw's API. // diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 27fe7579..be137eff 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -25,6 +25,92 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { + *out = *in + if in.Extension != nil { + in, out := &in.Extension, &out.Extension + *out = new(ExtensionConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig. +func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { + if in == nil { + return nil + } + out := new(EndpointPickerConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { + *out = *in + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(ExtensionReference) + (*in).DeepCopyInto(*out) + } + in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConfig. +func (in *ExtensionConfig) DeepCopy() *ExtensionConfig { + if in == nil { + return nil + } + out := new(ExtensionConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { + *out = *in + if in.TargetPortNumber != nil { + in, out := &in.TargetPortNumber, &out.TargetPortNumber + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. +func (in *ExtensionConnection) DeepCopy() *ExtensionConnection { + if in == nil { + return nil + } + out := new(ExtensionConnection) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { + *out = *in + if in.Group != nil { + in, out := &in.Group, &out.Group + *out = new(string) + **out = **in + } + if in.Kind != nil { + in, out := &in.Kind, &out.Kind + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference. +func (in *ExtensionReference) DeepCopy() *ExtensionReference { + if in == nil { + return nil + } + out := new(ExtensionReference) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceModel) DeepCopyInto(out *InferenceModel) { *out = *in @@ -203,6 +289,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } + in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go new file mode 100644 index 00000000..63651fc2 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go @@ -0,0 +1,38 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use +// with apply. +type EndpointPickerConfigApplyConfiguration struct { + Extension *ExtensionConfigApplyConfiguration `json:"extension,omitempty"` +} + +// EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with +// apply. +func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration { + return &EndpointPickerConfigApplyConfiguration{} +} + +// WithExtension sets the Extension field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Extension field is set to the value of the last call. +func (b *EndpointPickerConfigApplyConfiguration) WithExtension(value *ExtensionConfigApplyConfiguration) *EndpointPickerConfigApplyConfiguration { + b.Extension = value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go new file mode 100644 index 00000000..16a2c4c1 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go @@ -0,0 +1,67 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + apiv1alpha1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1" +) + +// ExtensionConfigApplyConfiguration represents a declarative configuration of the ExtensionConfig type for use +// with apply. +type ExtensionConfigApplyConfiguration struct { + ExtensionRef *ExtensionReferenceApplyConfiguration `json:"extensionRef,omitempty"` + *ExtensionConnectionApplyConfiguration `json:"extensionConnection,omitempty"` +} + +// ExtensionConfigApplyConfiguration constructs a declarative configuration of the ExtensionConfig type for use with +// apply. +func ExtensionConfig() *ExtensionConfigApplyConfiguration { + return &ExtensionConfigApplyConfiguration{} +} + +// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ExtensionRef field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithExtensionRef(value *ExtensionReferenceApplyConfiguration) *ExtensionConfigApplyConfiguration { + b.ExtensionRef = value + return b +} + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConfigApplyConfiguration { + b.ensureExtensionConnectionApplyConfigurationExists() + b.ExtensionConnectionApplyConfiguration.TargetPortNumber = &value + return b +} + +// WithFailureMode sets the FailureMode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the FailureMode field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConfigApplyConfiguration { + b.ensureExtensionConnectionApplyConfigurationExists() + b.ExtensionConnectionApplyConfiguration.FailureMode = &value + return b +} + +func (b *ExtensionConfigApplyConfiguration) ensureExtensionConnectionApplyConfigurationExists() { + if b.ExtensionConnectionApplyConfiguration == nil { + b.ExtensionConnectionApplyConfiguration = &ExtensionConnectionApplyConfiguration{} + } +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go new file mode 100644 index 00000000..f8f3e005 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go @@ -0,0 +1,51 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + apiv1alpha1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1" +) + +// ExtensionConnectionApplyConfiguration represents a declarative configuration of the ExtensionConnection type for use +// with apply. +type ExtensionConnectionApplyConfiguration struct { + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + FailureMode *apiv1alpha1.ExtensionFailureMode `json:"failureMode,omitempty"` +} + +// ExtensionConnectionApplyConfiguration constructs a declarative configuration of the ExtensionConnection type for use with +// apply. +func ExtensionConnection() *ExtensionConnectionApplyConfiguration { + return &ExtensionConnectionApplyConfiguration{} +} + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionConnectionApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConnectionApplyConfiguration { + b.TargetPortNumber = &value + return b +} + +// WithFailureMode sets the FailureMode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the FailureMode field is set to the value of the last call. +func (b *ExtensionConnectionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConnectionApplyConfiguration { + b.FailureMode = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go new file mode 100644 index 00000000..b1685634 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go @@ -0,0 +1,56 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// ExtensionReferenceApplyConfiguration represents a declarative configuration of the ExtensionReference type for use +// with apply. +type ExtensionReferenceApplyConfiguration struct { + Group *string `json:"group,omitempty"` + Kind *string `json:"kind,omitempty"` + Name *string `json:"name,omitempty"` +} + +// ExtensionReferenceApplyConfiguration constructs a declarative configuration of the ExtensionReference type for use with +// apply. +func ExtensionReference() *ExtensionReferenceApplyConfiguration { + return &ExtensionReferenceApplyConfiguration{} +} + +// WithGroup sets the Group field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Group field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithGroup(value string) *ExtensionReferenceApplyConfiguration { + b.Group = &value + return b +} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithKind(value string) *ExtensionReferenceApplyConfiguration { + b.Kind = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithName(value string) *ExtensionReferenceApplyConfiguration { + b.Name = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go index e8084155..82f2eb68 100644 --- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go @@ -24,8 +24,9 @@ import ( // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + *EndpointPickerConfigApplyConfiguration `json:"endpointPickerConfig,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -55,3 +56,18 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) b.TargetPortNumber = &value return b } + +// WithExtension sets the Extension field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Extension field is set to the value of the last call. +func (b *InferencePoolSpecApplyConfiguration) WithExtension(value *ExtensionConfigApplyConfiguration) *InferencePoolSpecApplyConfiguration { + b.ensureEndpointPickerConfigApplyConfigurationExists() + b.EndpointPickerConfigApplyConfiguration.Extension = value + return b +} + +func (b *InferencePoolSpecApplyConfiguration) ensureEndpointPickerConfigApplyConfigurationExists() { + if b.EndpointPickerConfigApplyConfiguration == nil { + b.EndpointPickerConfigApplyConfiguration = &EndpointPickerConfigApplyConfiguration{} + } +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index eb0264b3..1074b8a7 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -31,6 +31,14 @@ import ( func ForKind(kind schema.GroupVersionKind) interface{} { switch kind { // Group=api, Version=v1alpha1 + case v1alpha1.SchemeGroupVersion.WithKind("EndpointPickerConfig"): + return &apiv1alpha1.EndpointPickerConfigApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConfig"): + return &apiv1alpha1.ExtensionConfigApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConnection"): + return &apiv1alpha1.ExtensionConnectionApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("ExtensionReference"): + return &apiv1alpha1.ExtensionReferenceApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("InferenceModel"): return &apiv1alpha1.InferenceModelApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("InferenceModelSpec"): diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index 8e0ff54d..c5ef749e 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -39,6 +39,76 @@ spec: spec: description: InferencePoolSpec defines the desired state of InferencePool properties: + endpointPickerConfig: + description: |- + EndpointPickerConfig configures the extension that runs the endpoint picking service. + to this pool. + properties: + extension: + description: Extension configures an endpoint picker as an extension + service. + properties: + extensionConnection: + description: ExtensionConnection configures the connection + between the gateway and the extension. + properties: + failureMode: + default: FailClose + description: |- + Configures how the gateway handles the case when the extension is not responsive. + Defaults to failClose. + enum: + - FailOpen + - FailClose + type: string + targetPortNumber: + default: 9002 + description: The port number on the pods running the extension. + Defaults to 9002 if not set. + format: int32 + type: integer + required: + - failureMode + - targetPortNumber + type: object + extensionRef: + description: ExtensionRef is a reference to a service extension. + properties: + group: + default: "" + description: |- + Group is the group of the referent. + When unspecified or empty string, core API group is inferred. + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations SHOULD NOT + support ExternalName Services. + + Support: Core (Services with a type other than ExternalName) + + Support: Implementation-specific (Services with type ExternalName) + type: string + name: + description: Name is the name of the referent. + type: string + required: + - name + type: object + required: + - extensionConnection + - extensionRef + type: object + type: object selector: additionalProperties: description: |- @@ -72,6 +142,7 @@ spec: minimum: 1 type: integer required: + - endpointPickerConfig - selector - targetPortNumber type: object From 0f3d2a88c0181cc6e24ef86265710bab2f1659d3 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 30 Jan 2025 01:06:24 +0000 Subject: [PATCH 2/9] Addressing comments --- api/v1alpha1/inferencepool_types.go | 11 +++++------ api/v1alpha1/zz_generated.deepcopy.go | 12 ++++++------ .../api/v1alpha1/extensionconfig.go | 9 --------- .../api/v1alpha1/extensionconnection.go | 11 +---------- .../api/v1alpha1/extensionreference.go | 15 ++++++++++++--- ...rence.networking.x-k8s.io_inferencepools.yaml | 16 ++++++++-------- 6 files changed, 32 insertions(+), 42 deletions(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 4623e2fb..714040f8 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -98,7 +98,7 @@ type ExtensionReference struct { // ExternalName services can refer to CNAME DNS records that may live // outside of the cluster and as such are difficult to reason about in // terms of conformance. They also may not be safe to forward to (see - // CVE-2021-25740 for more information). Implementations SHOULD NOT + // CVE-2021-25740 for more information). Implementations MUST NOT // support ExternalName Services. // // Support: Core (Services with a type other than ExternalName) @@ -111,15 +111,14 @@ type ExtensionReference struct { // Name is the name of the referent. Name string `json:"name"` + + // The port number on the pods running the extension. When unspecified, implementations are recommended + // to default it to 9002 and the Kind is Service. + TargetPortNumber *int32 `json:"targetPortNumber"` } // ExtensionConnection encapsulates options that configures the connection to the extension. type ExtensionConnection struct { - // The port number on the pods running the extension. Defaults to 9002 if not set. - // - // +kubebuilder:default=9002 - TargetPortNumber *int32 `json:"targetPortNumber"` - // Configures how the gateway handles the case when the extension is not responsive. // Defaults to failClose. // diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index be137eff..c67b8907 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -53,7 +53,7 @@ func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { *out = new(ExtensionReference) (*in).DeepCopyInto(*out) } - in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) + out.ExtensionConnection = in.ExtensionConnection } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConfig. @@ -69,11 +69,6 @@ func (in *ExtensionConfig) DeepCopy() *ExtensionConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { *out = *in - if in.TargetPortNumber != nil { - in, out := &in.TargetPortNumber, &out.TargetPortNumber - *out = new(int32) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. @@ -99,6 +94,11 @@ func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { *out = new(string) **out = **in } + if in.TargetPortNumber != nil { + in, out := &in.TargetPortNumber, &out.TargetPortNumber + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference. diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go index 16a2c4c1..77b01467 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go @@ -42,15 +42,6 @@ func (b *ExtensionConfigApplyConfiguration) WithExtensionRef(value *ExtensionRef return b } -// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConfigApplyConfiguration { - b.ensureExtensionConnectionApplyConfigurationExists() - b.ExtensionConnectionApplyConfiguration.TargetPortNumber = &value - return b -} - // WithFailureMode sets the FailureMode field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the FailureMode field is set to the value of the last call. diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go index f8f3e005..be9eeaa1 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go @@ -24,8 +24,7 @@ import ( // ExtensionConnectionApplyConfiguration represents a declarative configuration of the ExtensionConnection type for use // with apply. type ExtensionConnectionApplyConfiguration struct { - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - FailureMode *apiv1alpha1.ExtensionFailureMode `json:"failureMode,omitempty"` + FailureMode *apiv1alpha1.ExtensionFailureMode `json:"failureMode,omitempty"` } // ExtensionConnectionApplyConfiguration constructs a declarative configuration of the ExtensionConnection type for use with @@ -34,14 +33,6 @@ func ExtensionConnection() *ExtensionConnectionApplyConfiguration { return &ExtensionConnectionApplyConfiguration{} } -// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *ExtensionConnectionApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConnectionApplyConfiguration { - b.TargetPortNumber = &value - return b -} - // WithFailureMode sets the FailureMode field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the FailureMode field is set to the value of the last call. diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go index b1685634..c72c0306 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go +++ b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go @@ -20,9 +20,10 @@ package v1alpha1 // ExtensionReferenceApplyConfiguration represents a declarative configuration of the ExtensionReference type for use // with apply. type ExtensionReferenceApplyConfiguration struct { - Group *string `json:"group,omitempty"` - Kind *string `json:"kind,omitempty"` - Name *string `json:"name,omitempty"` + Group *string `json:"group,omitempty"` + Kind *string `json:"kind,omitempty"` + Name *string `json:"name,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` } // ExtensionReferenceApplyConfiguration constructs a declarative configuration of the ExtensionReference type for use with @@ -54,3 +55,11 @@ func (b *ExtensionReferenceApplyConfiguration) WithName(value string) *Extension b.Name = &value return b } + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionReferenceApplyConfiguration { + b.TargetPortNumber = &value + return b +} diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index c5ef749e..b93b3f28 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -61,15 +61,8 @@ spec: - FailOpen - FailClose type: string - targetPortNumber: - default: 9002 - description: The port number on the pods running the extension. - Defaults to 9002 if not set. - format: int32 - type: integer required: - failureMode - - targetPortNumber type: object extensionRef: description: ExtensionRef is a reference to a service extension. @@ -91,7 +84,7 @@ spec: ExternalName services can refer to CNAME DNS records that may live outside of the cluster and as such are difficult to reason about in terms of conformance. They also may not be safe to forward to (see - CVE-2021-25740 for more information). Implementations SHOULD NOT + CVE-2021-25740 for more information). Implementations MUST NOT support ExternalName Services. Support: Core (Services with a type other than ExternalName) @@ -101,8 +94,15 @@ spec: name: description: Name is the name of the referent. type: string + targetPortNumber: + description: |- + The port number on the pods running the extension. When unspecified, implementations are recommended + to default it to 9002 and the Kind is Service. + format: int32 + type: integer required: - name + - targetPortNumber type: object required: - extensionConnection From 3549e3246759d6befc2cb43862fcc0b08f49eddf Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 30 Jan 2025 17:44:20 +0000 Subject: [PATCH 3/9] Addressing comments round 2 --- api/v1alpha1/inferencepool_types.go | 17 ++++++++--------- api/v1alpha1/zz_generated.deepcopy.go | 7 ++++++- ...ence.networking.x-k8s.io_inferencepools.yaml | 7 ------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 714040f8..0d52aea0 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -69,13 +69,15 @@ type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // // +optional - Extension *ExtensionConfig `json:"extension"` + Extension *ExtensionConfig `json:"extension,omitempty"` } // ExtensionConfig specifies how to configure an extension that runs the endpoint picker. type ExtensionConfig struct { // ExtensionRef is a reference to a service extension. - ExtensionRef *ExtensionReference `json:"extensionRef"` + // + // +optional + ExtensionRef *ExtensionReference `json:"extensionRef,omitempty"` // ExtensionConnection configures the connection between the gateway and the extension. ExtensionConnection `json:"extensionConnection"` @@ -101,10 +103,6 @@ type ExtensionReference struct { // CVE-2021-25740 for more information). Implementations MUST NOT // support ExternalName Services. // - // Support: Core (Services with a type other than ExternalName) - // - // Support: Implementation-specific (Services with type ExternalName) - // // +optional // +kubebuilder:default=Service Kind *string `json:"kind,omitempty"` @@ -122,8 +120,9 @@ type ExtensionConnection struct { // Configures how the gateway handles the case when the extension is not responsive. // Defaults to failClose. // + // +optional // +kubebuilder:default="FailClose" - FailureMode ExtensionFailureMode `json:"failureMode"` + FailureMode *ExtensionFailureMode `json:"failureMode"` } // ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not @@ -132,9 +131,9 @@ type ExtensionConnection struct { type ExtensionFailureMode string const ( - // The endpoint will be selected via the provider’s LB configured algorithm. + // FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking. FailOpen ExtensionFailureMode = "FailOpen" - // Requests should be dropped. + // FailClose specifies that the proxy should drop the request. FailClose ExtensionFailureMode = "FailClose" ) diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index c67b8907..321176a4 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -53,7 +53,7 @@ func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { *out = new(ExtensionReference) (*in).DeepCopyInto(*out) } - out.ExtensionConnection = in.ExtensionConnection + in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConfig. @@ -69,6 +69,11 @@ func (in *ExtensionConfig) DeepCopy() *ExtensionConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { *out = *in + if in.FailureMode != nil { + in, out := &in.FailureMode, &out.FailureMode + *out = new(ExtensionFailureMode) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index b93b3f28..0a885d2a 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -61,8 +61,6 @@ spec: - FailOpen - FailClose type: string - required: - - failureMode type: object extensionRef: description: ExtensionRef is a reference to a service extension. @@ -86,10 +84,6 @@ spec: terms of conformance. They also may not be safe to forward to (see CVE-2021-25740 for more information). Implementations MUST NOT support ExternalName Services. - - Support: Core (Services with a type other than ExternalName) - - Support: Implementation-specific (Services with type ExternalName) type: string name: description: Name is the name of the referent. @@ -106,7 +100,6 @@ spec: type: object required: - extensionConnection - - extensionRef type: object type: object selector: From 2a932cfe2c385323b076e1b852a9cb6f6b770786 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 30 Jan 2025 19:12:14 +0000 Subject: [PATCH 4/9] Addressing comments round 3 --- api/v1alpha1/inferencepool_types.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 0d52aea0..92ac3dfa 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -65,6 +65,8 @@ type InferencePoolSpec struct { EndpointPickerConfig `json:"endpointPickerConfig"` } +// EndpointPickerConfig specifies configuration needed by the proxy to find and connect to the Endpoint Picker. +// This type is intended to be a union of mutually exclusive configuration options. type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // From dd10ae2d8e11e95f338451fb5b71dbb37a963916 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 30 Jan 2025 19:45:50 +0000 Subject: [PATCH 5/9] Make ExtensionConfig required --- api/v1alpha1/inferencepool_types.go | 2 +- .../crd/bases/inference.networking.x-k8s.io_inferencepools.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 92ac3dfa..4d4821b0 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -70,7 +70,7 @@ type InferencePoolSpec struct { type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // - // +optional + // +kubebuilder:validation:Required Extension *ExtensionConfig `json:"extension,omitempty"` } diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index 0a885d2a..c748ec46 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -101,6 +101,8 @@ spec: required: - extensionConnection type: object + required: + - extension type: object selector: additionalProperties: From dec564c76b8655ccdbe92146af462242bc6c7359 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 30 Jan 2025 22:05:50 +0000 Subject: [PATCH 6/9] Cleanup --- api/v1alpha1/inferencepool_types.go | 20 ++++++++++--------- api/v1alpha1/zz_generated.deepcopy.go | 6 +----- ...ce.networking.x-k8s.io_inferencepools.yaml | 6 +++--- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 4d4821b0..2baa1fbd 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -60,26 +60,24 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required TargetPortNumber int32 `json:"targetPortNumber"` - // EndpointPickerConfig configures the extension that runs the endpoint picking service. - // to this pool. + // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint + // picker service that picks endpoints for the requests routed to this pool. EndpointPickerConfig `json:"endpointPickerConfig"` } -// EndpointPickerConfig specifies configuration needed by the proxy to find and connect to the Endpoint Picker. -// This type is intended to be a union of mutually exclusive configuration options. +// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. +// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // // +kubebuilder:validation:Required - Extension *ExtensionConfig `json:"extension,omitempty"` + Extension *ExtensionConfig `json:"extension"` } // ExtensionConfig specifies how to configure an extension that runs the endpoint picker. type ExtensionConfig struct { // ExtensionRef is a reference to a service extension. - // - // +optional - ExtensionRef *ExtensionReference `json:"extensionRef,omitempty"` + ExtensionRef ExtensionReference `json:"extensionRef"` // ExtensionConnection configures the connection between the gateway and the extension. ExtensionConnection `json:"extensionConnection"` @@ -110,11 +108,15 @@ type ExtensionReference struct { Kind *string `json:"kind,omitempty"` // Name is the name of the referent. + // + // +kubebuilder:validation:Required Name string `json:"name"` // The port number on the pods running the extension. When unspecified, implementations are recommended // to default it to 9002 and the Kind is Service. - TargetPortNumber *int32 `json:"targetPortNumber"` + // + // +optional + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` } // ExtensionConnection encapsulates options that configures the connection to the extension. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 321176a4..44ed9c5d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -48,11 +48,7 @@ func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { *out = *in - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(ExtensionReference) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) } diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index c748ec46..f95dd4f4 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -41,8 +41,8 @@ spec: properties: endpointPickerConfig: description: |- - EndpointPickerConfig configures the extension that runs the endpoint picking service. - to this pool. + EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint + picker service that picks endpoints for the requests routed to this pool. properties: extension: description: Extension configures an endpoint picker as an extension @@ -96,10 +96,10 @@ spec: type: integer required: - name - - targetPortNumber type: object required: - extensionConnection + - extensionRef type: object required: - extension From 7a6f111d554f3af831710a28ad552f3e50512a52 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Fri, 31 Jan 2025 05:19:15 +0000 Subject: [PATCH 7/9] add inline --- api/v1alpha1/inferencepool_types.go | 6 +- .../api/v1alpha1/extensionconfig.go | 11 +- .../api/v1alpha1/inferencepoolspec.go | 13 +-- ...ce.networking.x-k8s.io_inferencepools.yaml | 102 ++++++++---------- 4 files changed, 52 insertions(+), 80 deletions(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 2baa1fbd..32103511 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -62,7 +62,7 @@ type InferencePoolSpec struct { // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint // picker service that picks endpoints for the requests routed to this pool. - EndpointPickerConfig `json:"endpointPickerConfig"` + EndpointPickerConfig `json:",inline"` } // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. @@ -71,7 +71,7 @@ type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // // +kubebuilder:validation:Required - Extension *ExtensionConfig `json:"extension"` + Extension *ExtensionConfig `json:"extension,omitempty"` } // ExtensionConfig specifies how to configure an extension that runs the endpoint picker. @@ -80,7 +80,7 @@ type ExtensionConfig struct { ExtensionRef ExtensionReference `json:"extensionRef"` // ExtensionConnection configures the connection between the gateway and the extension. - ExtensionConnection `json:"extensionConnection"` + ExtensionConnection `json:",inline"` } // ExtensionReference is a reference to the extension deployment. diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go index 77b01467..ec35a74f 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go @@ -24,8 +24,8 @@ import ( // ExtensionConfigApplyConfiguration represents a declarative configuration of the ExtensionConfig type for use // with apply. type ExtensionConfigApplyConfiguration struct { - ExtensionRef *ExtensionReferenceApplyConfiguration `json:"extensionRef,omitempty"` - *ExtensionConnectionApplyConfiguration `json:"extensionConnection,omitempty"` + ExtensionRef *ExtensionReferenceApplyConfiguration `json:"extensionRef,omitempty"` + ExtensionConnectionApplyConfiguration `json:",inline"` } // ExtensionConfigApplyConfiguration constructs a declarative configuration of the ExtensionConfig type for use with @@ -46,13 +46,6 @@ func (b *ExtensionConfigApplyConfiguration) WithExtensionRef(value *ExtensionRef // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the FailureMode field is set to the value of the last call. func (b *ExtensionConfigApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConfigApplyConfiguration { - b.ensureExtensionConnectionApplyConfigurationExists() b.ExtensionConnectionApplyConfiguration.FailureMode = &value return b } - -func (b *ExtensionConfigApplyConfiguration) ensureExtensionConnectionApplyConfigurationExists() { - if b.ExtensionConnectionApplyConfiguration == nil { - b.ExtensionConnectionApplyConfiguration = &ExtensionConnectionApplyConfiguration{} - } -} diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go index 82f2eb68..1642fa56 100644 --- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go @@ -24,9 +24,9 @@ import ( // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - *EndpointPickerConfigApplyConfiguration `json:"endpointPickerConfig,omitempty"` + Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + EndpointPickerConfigApplyConfiguration `json:",inline"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -61,13 +61,6 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Extension field is set to the value of the last call. func (b *InferencePoolSpecApplyConfiguration) WithExtension(value *ExtensionConfigApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.ensureEndpointPickerConfigApplyConfigurationExists() b.EndpointPickerConfigApplyConfiguration.Extension = value return b } - -func (b *InferencePoolSpecApplyConfiguration) ensureEndpointPickerConfigApplyConfigurationExists() { - if b.EndpointPickerConfigApplyConfiguration == nil { - b.EndpointPickerConfigApplyConfiguration = &EndpointPickerConfigApplyConfiguration{} - } -} diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index f95dd4f4..42155cd9 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -39,70 +39,56 @@ spec: spec: description: InferencePoolSpec defines the desired state of InferencePool properties: - endpointPickerConfig: - description: |- - EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint - picker service that picks endpoints for the requests routed to this pool. + extension: + description: Extension configures an endpoint picker as an extension + service. properties: - extension: - description: Extension configures an endpoint picker as an extension - service. + extensionRef: + description: ExtensionRef is a reference to a service extension. properties: - extensionConnection: - description: ExtensionConnection configures the connection - between the gateway and the extension. - properties: - failureMode: - default: FailClose - description: |- - Configures how the gateway handles the case when the extension is not responsive. - Defaults to failClose. - enum: - - FailOpen - - FailClose - type: string - type: object - extensionRef: - description: ExtensionRef is a reference to a service extension. - properties: - group: - default: "" - description: |- - Group is the group of the referent. - When unspecified or empty string, core API group is inferred. - type: string - kind: - default: Service - description: |- - Kind is the Kubernetes resource kind of the referent. For example - "Service". + group: + default: "" + description: |- + Group is the group of the referent. + When unspecified or empty string, core API group is inferred. + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". - Defaults to "Service" when not specified. + Defaults to "Service" when not specified. - ExternalName services can refer to CNAME DNS records that may live - outside of the cluster and as such are difficult to reason about in - terms of conformance. They also may not be safe to forward to (see - CVE-2021-25740 for more information). Implementations MUST NOT - support ExternalName Services. - type: string - name: - description: Name is the name of the referent. - type: string - targetPortNumber: - description: |- - The port number on the pods running the extension. When unspecified, implementations are recommended - to default it to 9002 and the Kind is Service. - format: int32 - type: integer - required: - - name - type: object + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations MUST NOT + support ExternalName Services. + type: string + name: + description: Name is the name of the referent. + type: string + targetPortNumber: + description: |- + The port number on the pods running the extension. When unspecified, implementations are recommended + to default it to 9002 and the Kind is Service. + format: int32 + type: integer required: - - extensionConnection - - extensionRef + - name type: object + failureMode: + default: FailClose + description: |- + Configures how the gateway handles the case when the extension is not responsive. + Defaults to failClose. + enum: + - FailOpen + - FailClose + type: string required: - - extension + - extensionRef type: object selector: additionalProperties: @@ -137,7 +123,7 @@ spec: minimum: 1 type: integer required: - - endpointPickerConfig + - extension - selector - targetPortNumber type: object From 36fa155cf0508e3a27319635b2eb18471b3b0b79 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Fri, 31 Jan 2025 20:30:40 +0000 Subject: [PATCH 8/9] integration test fixes and making extensionRef inlined --- Makefile | 4 ++ api/v1alpha1/inferencepool_types.go | 4 +- api/v1alpha1/zz_generated.deepcopy.go | 2 +- .../api/v1alpha1/extensionconfig.go | 34 ++++++++-- ...ce.networking.x-k8s.io_inferencepools.yaml | 66 +++++++++---------- pkg/manifests/ext_proc.yaml | 2 + test/integration/hermetic_test.go | 24 ++++--- .../inferencepool-with-model-hermetic.yaml | 13 ++-- 8 files changed, 87 insertions(+), 62 deletions(-) diff --git a/Makefile b/Makefile index e34a1a92..087dd69d 100644 --- a/Makefile +++ b/Makefile @@ -105,6 +105,10 @@ vet: ## Run go vet against code. test: manifests generate fmt vet envtest ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out +.PHONY: test-integration +test-integration: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration -coverprofile cover.out + .PHONY: test-e2e test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs. go test ./test/e2e/ -v -ginkgo.v diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 32103511..10ccde78 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -76,8 +76,8 @@ type EndpointPickerConfig struct { // ExtensionConfig specifies how to configure an extension that runs the endpoint picker. type ExtensionConfig struct { - // ExtensionRef is a reference to a service extension. - ExtensionRef ExtensionReference `json:"extensionRef"` + // Reference is a reference to a service extension. + ExtensionReference `json:",inline"` // ExtensionConnection configures the connection between the gateway and the extension. ExtensionConnection `json:",inline"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 44ed9c5d..b86f6d7f 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -48,7 +48,7 @@ func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { *out = *in - in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) + in.ExtensionReference.DeepCopyInto(&out.ExtensionReference) in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) } diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go index ec35a74f..a5492066 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go @@ -24,7 +24,7 @@ import ( // ExtensionConfigApplyConfiguration represents a declarative configuration of the ExtensionConfig type for use // with apply. type ExtensionConfigApplyConfiguration struct { - ExtensionRef *ExtensionReferenceApplyConfiguration `json:"extensionRef,omitempty"` + ExtensionReferenceApplyConfiguration `json:",inline"` ExtensionConnectionApplyConfiguration `json:",inline"` } @@ -34,11 +34,35 @@ func ExtensionConfig() *ExtensionConfigApplyConfiguration { return &ExtensionConfigApplyConfiguration{} } -// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value +// WithGroup sets the Group field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the ExtensionRef field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithExtensionRef(value *ExtensionReferenceApplyConfiguration) *ExtensionConfigApplyConfiguration { - b.ExtensionRef = value +// If called multiple times, the Group field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithGroup(value string) *ExtensionConfigApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Group = &value + return b +} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithKind(value string) *ExtensionConfigApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Kind = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithName(value string) *ExtensionConfigApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Name = &value + return b +} + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionConfigApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConfigApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.TargetPortNumber = &value return b } diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index 42155cd9..38d1a900 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -43,41 +43,6 @@ spec: description: Extension configures an endpoint picker as an extension service. properties: - extensionRef: - description: ExtensionRef is a reference to a service extension. - properties: - group: - default: "" - description: |- - Group is the group of the referent. - When unspecified or empty string, core API group is inferred. - type: string - kind: - default: Service - description: |- - Kind is the Kubernetes resource kind of the referent. For example - "Service". - - Defaults to "Service" when not specified. - - ExternalName services can refer to CNAME DNS records that may live - outside of the cluster and as such are difficult to reason about in - terms of conformance. They also may not be safe to forward to (see - CVE-2021-25740 for more information). Implementations MUST NOT - support ExternalName Services. - type: string - name: - description: Name is the name of the referent. - type: string - targetPortNumber: - description: |- - The port number on the pods running the extension. When unspecified, implementations are recommended - to default it to 9002 and the Kind is Service. - format: int32 - type: integer - required: - - name - type: object failureMode: default: FailClose description: |- @@ -87,8 +52,37 @@ spec: - FailOpen - FailClose type: string + group: + default: "" + description: |- + Group is the group of the referent. + When unspecified or empty string, core API group is inferred. + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations MUST NOT + support ExternalName Services. + type: string + name: + description: Name is the name of the referent. + type: string + targetPortNumber: + description: |- + The port number on the pods running the extension. When unspecified, implementations are recommended + to default it to 9002 and the Kind is Service. + format: int32 + type: integer required: - - extensionRef + - name type: object selector: additionalProperties: diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index 45bc264d..a61fab15 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -49,6 +49,8 @@ spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool + extension: + name: inference-gateway-ext-proc --- apiVersion: apps/v1 kind: Deployment diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go index 1379285f..6f138fd6 100644 --- a/test/integration/hermetic_test.go +++ b/test/integration/hermetic_test.go @@ -300,22 +300,28 @@ func setUpHermeticServer(t *testing.T, pods []*backend.PodMetrics) (client extPr log.Fatalf("Can't read object manifests at path %v, %v", manifestsPath, err) } - inferenceModels := make([]*v1alpha1.InferenceModel, 0) for _, doc := range docs { inferenceModel := &v1alpha1.InferenceModel{} if err = yaml.Unmarshal(doc, inferenceModel); err != nil { log.Fatalf("Can't unmarshal object: %v", doc) } - if inferenceModel.Kind != "InferenceModel" { - continue + if inferenceModel.Kind == "InferenceModel" { + t.Logf("Creating inference model: %+v", inferenceModel) + if err := k8sClient.Create(context.Background(), inferenceModel); err != nil { + log.Fatalf("unable to create inferenceModel %v: %v", inferenceModel.Name, err) + } } - inferenceModels = append(inferenceModels, inferenceModel) } - t.Logf("Inference models to add: %+v", inferenceModels) - for _, model := range inferenceModels { - t.Logf("Creating inference model: %+v", model) - if err := k8sClient.Create(context.Background(), model); err != nil { - log.Fatalf("unable to create inferenceModel %v: %v", model.GetName(), err) + for _, doc := range docs { + inferencePool := &v1alpha1.InferencePool{} + if err = yaml.Unmarshal(doc, inferencePool); err != nil { + log.Fatalf("Can't unmarshal object: %v", doc) + } + if inferencePool.Kind == "InferencePool" { + t.Logf("Creating inference pool: %+v", inferencePool) + if err := k8sClient.Create(context.Background(), inferencePool); err != nil { + log.Fatalf("unable to create inferencePool %v: %v", inferencePool.Name, err) + } } } diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 8703c37a..a1cbc066 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -1,30 +1,25 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: InferencePool metadata: - labels: name: vllm-llama2-7b-pool + namespace: default spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool + extension: + name: epp --- apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: InferenceModel metadata: - labels: - app.kubernetes.io/name: api - app.kubernetes.io/managed-by: kustomize name: inferencemodel-sample namespace: default spec: modelName: sql-lora criticality: Critical poolRef: - # this is the default val: - group: inference.networking.x-k8s.io - # this is the default val: - kind: InferencePool name: vllm-llama2-7b-pool targetModels: - name: sql-lora-1fdg2 - weight: 100 \ No newline at end of file + weight: 100 From c94b10ef441cd14a5a5f3f426686e6432927b756 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Sat, 1 Feb 2025 00:55:29 +0000 Subject: [PATCH 9/9] rename to extensionRef --- api/v1alpha1/inferencepool_types.go | 12 ++++++----- api/v1alpha1/zz_generated.deepcopy.go | 14 ++++++------- .../api/v1alpha1/endpointpickerconfig.go | 10 +++++----- .../{extensionconfig.go => extension.go} | 20 +++++++++---------- .../api/v1alpha1/inferencepoolspec.go | 8 ++++---- client-go/applyconfiguration/utils.go | 4 ++-- ...ce.networking.x-k8s.io_inferencepools.yaml | 10 ++++++---- pkg/manifests/ext_proc.yaml | 2 +- .../inferencepool-with-model-hermetic.yaml | 2 +- 9 files changed, 43 insertions(+), 39 deletions(-) rename client-go/applyconfiguration/api/v1alpha1/{extensionconfig.go => extension.go} (73%) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 10ccde78..61a3764d 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -71,11 +71,11 @@ type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. // // +kubebuilder:validation:Required - Extension *ExtensionConfig `json:"extension,omitempty"` + ExtensionRef *Extension `json:"extensionRef,omitempty"` } -// ExtensionConfig specifies how to configure an extension that runs the endpoint picker. -type ExtensionConfig struct { +// Extension specifies how to configure an extension that runs the endpoint picker. +type Extension struct { // Reference is a reference to a service extension. ExtensionReference `json:",inline"` @@ -112,9 +112,11 @@ type ExtensionReference struct { // +kubebuilder:validation:Required Name string `json:"name"` - // The port number on the pods running the extension. When unspecified, implementations are recommended - // to default it to 9002 and the Kind is Service. + // The port number on the pods running the extension. When unspecified, implementations SHOULD infer a + // default value of 9002 when the Kind is Service. // + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 // +optional TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` } diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index b86f6d7f..fd55379e 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -28,9 +28,9 @@ import ( // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { *out = *in - if in.Extension != nil { - in, out := &in.Extension, &out.Extension - *out = new(ExtensionConfig) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) (*in).DeepCopyInto(*out) } } @@ -46,18 +46,18 @@ func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ExtensionConfig) DeepCopyInto(out *ExtensionConfig) { +func (in *Extension) DeepCopyInto(out *Extension) { *out = *in in.ExtensionReference.DeepCopyInto(&out.ExtensionReference) in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConfig. -func (in *ExtensionConfig) DeepCopy() *ExtensionConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. +func (in *Extension) DeepCopy() *Extension { if in == nil { return nil } - out := new(ExtensionConfig) + out := new(Extension) in.DeepCopyInto(out) return out } diff --git a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go index 63651fc2..91895ddc 100644 --- a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go +++ b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go @@ -20,7 +20,7 @@ package v1alpha1 // EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use // with apply. type EndpointPickerConfigApplyConfiguration struct { - Extension *ExtensionConfigApplyConfiguration `json:"extension,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with @@ -29,10 +29,10 @@ func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration { return &EndpointPickerConfigApplyConfiguration{} } -// WithExtension sets the Extension field in the declarative configuration to the given value +// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Extension field is set to the value of the last call. -func (b *EndpointPickerConfigApplyConfiguration) WithExtension(value *ExtensionConfigApplyConfiguration) *EndpointPickerConfigApplyConfiguration { - b.Extension = value +// If called multiple times, the ExtensionRef field is set to the value of the last call. +func (b *EndpointPickerConfigApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *EndpointPickerConfigApplyConfiguration { + b.ExtensionRef = value return b } diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go b/client-go/applyconfiguration/api/v1alpha1/extension.go similarity index 73% rename from client-go/applyconfiguration/api/v1alpha1/extensionconfig.go rename to client-go/applyconfiguration/api/v1alpha1/extension.go index a5492066..27807448 100644 --- a/client-go/applyconfiguration/api/v1alpha1/extensionconfig.go +++ b/client-go/applyconfiguration/api/v1alpha1/extension.go @@ -21,23 +21,23 @@ import ( apiv1alpha1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1" ) -// ExtensionConfigApplyConfiguration represents a declarative configuration of the ExtensionConfig type for use +// ExtensionApplyConfiguration represents a declarative configuration of the Extension type for use // with apply. -type ExtensionConfigApplyConfiguration struct { +type ExtensionApplyConfiguration struct { ExtensionReferenceApplyConfiguration `json:",inline"` ExtensionConnectionApplyConfiguration `json:",inline"` } -// ExtensionConfigApplyConfiguration constructs a declarative configuration of the ExtensionConfig type for use with +// ExtensionApplyConfiguration constructs a declarative configuration of the Extension type for use with // apply. -func ExtensionConfig() *ExtensionConfigApplyConfiguration { - return &ExtensionConfigApplyConfiguration{} +func Extension() *ExtensionApplyConfiguration { + return &ExtensionApplyConfiguration{} } // WithGroup sets the Group field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Group field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithGroup(value string) *ExtensionConfigApplyConfiguration { +func (b *ExtensionApplyConfiguration) WithGroup(value string) *ExtensionApplyConfiguration { b.ExtensionReferenceApplyConfiguration.Group = &value return b } @@ -45,7 +45,7 @@ func (b *ExtensionConfigApplyConfiguration) WithGroup(value string) *ExtensionCo // WithKind sets the Kind field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Kind field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithKind(value string) *ExtensionConfigApplyConfiguration { +func (b *ExtensionApplyConfiguration) WithKind(value string) *ExtensionApplyConfiguration { b.ExtensionReferenceApplyConfiguration.Kind = &value return b } @@ -53,7 +53,7 @@ func (b *ExtensionConfigApplyConfiguration) WithKind(value string) *ExtensionCon // WithName sets the Name field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Name field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithName(value string) *ExtensionConfigApplyConfiguration { +func (b *ExtensionApplyConfiguration) WithName(value string) *ExtensionApplyConfiguration { b.ExtensionReferenceApplyConfiguration.Name = &value return b } @@ -61,7 +61,7 @@ func (b *ExtensionConfigApplyConfiguration) WithName(value string) *ExtensionCon // WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionConfigApplyConfiguration { +func (b *ExtensionApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionApplyConfiguration { b.ExtensionReferenceApplyConfiguration.TargetPortNumber = &value return b } @@ -69,7 +69,7 @@ func (b *ExtensionConfigApplyConfiguration) WithTargetPortNumber(value int32) *E // WithFailureMode sets the FailureMode field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the FailureMode field is set to the value of the last call. -func (b *ExtensionConfigApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConfigApplyConfiguration { +func (b *ExtensionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionApplyConfiguration { b.ExtensionConnectionApplyConfiguration.FailureMode = &value return b } diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go index 1642fa56..e132f74b 100644 --- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go @@ -57,10 +57,10 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) return b } -// WithExtension sets the Extension field in the declarative configuration to the given value +// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Extension field is set to the value of the last call. -func (b *InferencePoolSpecApplyConfiguration) WithExtension(value *ExtensionConfigApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.EndpointPickerConfigApplyConfiguration.Extension = value +// If called multiple times, the ExtensionRef field is set to the value of the last call. +func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration { + b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value return b } diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index 1074b8a7..1a71b674 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -33,8 +33,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { // Group=api, Version=v1alpha1 case v1alpha1.SchemeGroupVersion.WithKind("EndpointPickerConfig"): return &apiv1alpha1.EndpointPickerConfigApplyConfiguration{} - case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConfig"): - return &apiv1alpha1.ExtensionConfigApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Extension"): + return &apiv1alpha1.ExtensionApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConnection"): return &apiv1alpha1.ExtensionConnectionApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("ExtensionReference"): diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index 38d1a900..9e6473b9 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -39,7 +39,7 @@ spec: spec: description: InferencePoolSpec defines the desired state of InferencePool properties: - extension: + extensionRef: description: Extension configures an endpoint picker as an extension service. properties: @@ -77,9 +77,11 @@ spec: type: string targetPortNumber: description: |- - The port number on the pods running the extension. When unspecified, implementations are recommended - to default it to 9002 and the Kind is Service. + The port number on the pods running the extension. When unspecified, implementations SHOULD infer a + default value of 9002 when the Kind is Service. format: int32 + maximum: 65535 + minimum: 1 type: integer required: - name @@ -117,7 +119,7 @@ spec: minimum: 1 type: integer required: - - extension + - extensionRef - selector - targetPortNumber type: object diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index a61fab15..410c31ed 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -49,7 +49,7 @@ spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool - extension: + extensionRef: name: inference-gateway-ext-proc --- apiVersion: apps/v1 diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index a1cbc066..a07e0f35 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -7,7 +7,7 @@ spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool - extension: + extensionRef: name: epp --- apiVersion: inference.networking.x-k8s.io/v1alpha1