diff --git a/Makefile b/Makefile index e34a1a92..087dd69d 100644 --- a/Makefile +++ b/Makefile @@ -105,6 +105,10 @@ vet: ## Run go vet against code. test: manifests generate fmt vet envtest ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out +.PHONY: test-integration +test-integration: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration -coverprofile cover.out + .PHONY: test-e2e test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs. go test ./test/e2e/ -v -ginkgo.v diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index d89b8df5..61a3764d 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -59,8 +59,90 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required TargetPortNumber int32 `json:"targetPortNumber"` + + // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint + // picker service that picks endpoints for the requests routed to this pool. + EndpointPickerConfig `json:",inline"` +} + +// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. +// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. +type EndpointPickerConfig struct { + // Extension configures an endpoint picker as an extension service. + // + // +kubebuilder:validation:Required + ExtensionRef *Extension `json:"extensionRef,omitempty"` } +// Extension specifies how to configure an extension that runs the endpoint picker. +type Extension struct { + // Reference is a reference to a service extension. + ExtensionReference `json:",inline"` + + // ExtensionConnection configures the connection between the gateway and the extension. + ExtensionConnection `json:",inline"` +} + +// ExtensionReference is a reference to the extension deployment. +type ExtensionReference struct { + // Group is the group of the referent. + // When unspecified or empty string, core API group is inferred. + // + // +optional + // +kubebuilder:default="" + Group *string `json:"group,omitempty"` + + // Kind is the Kubernetes resource kind of the referent. For example + // "Service". + // + // Defaults to "Service" when not specified. + // + // ExternalName services can refer to CNAME DNS records that may live + // outside of the cluster and as such are difficult to reason about in + // terms of conformance. They also may not be safe to forward to (see + // CVE-2021-25740 for more information). Implementations MUST NOT + // support ExternalName Services. + // + // +optional + // +kubebuilder:default=Service + Kind *string `json:"kind,omitempty"` + + // Name is the name of the referent. + // + // +kubebuilder:validation:Required + Name string `json:"name"` + + // The port number on the pods running the extension. When unspecified, implementations SHOULD infer a + // default value of 9002 when the Kind is Service. + // + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + // +optional + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` +} + +// ExtensionConnection encapsulates options that configures the connection to the extension. +type ExtensionConnection struct { + // Configures how the gateway handles the case when the extension is not responsive. + // Defaults to failClose. + // + // +optional + // +kubebuilder:default="FailClose" + FailureMode *ExtensionFailureMode `json:"failureMode"` +} + +// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not +// responsive. +// +kubebuilder:validation:Enum=FailOpen;FailClose +type ExtensionFailureMode string + +const ( + // FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking. + FailOpen ExtensionFailureMode = "FailOpen" + // FailClose specifies that the proxy should drop the request. + FailClose ExtensionFailureMode = "FailClose" +) + // LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 // Duplicated as to not take an unexpected dependency on gw's API. // diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 27fe7579..fd55379e 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -25,6 +25,93 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { + *out = *in + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig. +func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { + if in == nil { + return nil + } + out := new(EndpointPickerConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Extension) DeepCopyInto(out *Extension) { + *out = *in + in.ExtensionReference.DeepCopyInto(&out.ExtensionReference) + in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. +func (in *Extension) DeepCopy() *Extension { + if in == nil { + return nil + } + out := new(Extension) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { + *out = *in + if in.FailureMode != nil { + in, out := &in.FailureMode, &out.FailureMode + *out = new(ExtensionFailureMode) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. +func (in *ExtensionConnection) DeepCopy() *ExtensionConnection { + if in == nil { + return nil + } + out := new(ExtensionConnection) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { + *out = *in + if in.Group != nil { + in, out := &in.Group, &out.Group + *out = new(string) + **out = **in + } + if in.Kind != nil { + in, out := &in.Kind, &out.Kind + *out = new(string) + **out = **in + } + if in.TargetPortNumber != nil { + in, out := &in.TargetPortNumber, &out.TargetPortNumber + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference. +func (in *ExtensionReference) DeepCopy() *ExtensionReference { + if in == nil { + return nil + } + out := new(ExtensionReference) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferenceModel) DeepCopyInto(out *InferenceModel) { *out = *in @@ -203,6 +290,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } + in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go new file mode 100644 index 00000000..91895ddc --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go @@ -0,0 +1,38 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use +// with apply. +type EndpointPickerConfigApplyConfiguration struct { + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` +} + +// EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with +// apply. +func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration { + return &EndpointPickerConfigApplyConfiguration{} +} + +// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ExtensionRef field is set to the value of the last call. +func (b *EndpointPickerConfigApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *EndpointPickerConfigApplyConfiguration { + b.ExtensionRef = value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extension.go b/client-go/applyconfiguration/api/v1alpha1/extension.go new file mode 100644 index 00000000..27807448 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extension.go @@ -0,0 +1,75 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + apiv1alpha1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1" +) + +// ExtensionApplyConfiguration represents a declarative configuration of the Extension type for use +// with apply. +type ExtensionApplyConfiguration struct { + ExtensionReferenceApplyConfiguration `json:",inline"` + ExtensionConnectionApplyConfiguration `json:",inline"` +} + +// ExtensionApplyConfiguration constructs a declarative configuration of the Extension type for use with +// apply. +func Extension() *ExtensionApplyConfiguration { + return &ExtensionApplyConfiguration{} +} + +// WithGroup sets the Group field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Group field is set to the value of the last call. +func (b *ExtensionApplyConfiguration) WithGroup(value string) *ExtensionApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Group = &value + return b +} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *ExtensionApplyConfiguration) WithKind(value string) *ExtensionApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Kind = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *ExtensionApplyConfiguration) WithName(value string) *ExtensionApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.Name = &value + return b +} + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionApplyConfiguration { + b.ExtensionReferenceApplyConfiguration.TargetPortNumber = &value + return b +} + +// WithFailureMode sets the FailureMode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the FailureMode field is set to the value of the last call. +func (b *ExtensionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionApplyConfiguration { + b.ExtensionConnectionApplyConfiguration.FailureMode = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go new file mode 100644 index 00000000..be9eeaa1 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go @@ -0,0 +1,42 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + apiv1alpha1 "inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1" +) + +// ExtensionConnectionApplyConfiguration represents a declarative configuration of the ExtensionConnection type for use +// with apply. +type ExtensionConnectionApplyConfiguration struct { + FailureMode *apiv1alpha1.ExtensionFailureMode `json:"failureMode,omitempty"` +} + +// ExtensionConnectionApplyConfiguration constructs a declarative configuration of the ExtensionConnection type for use with +// apply. +func ExtensionConnection() *ExtensionConnectionApplyConfiguration { + return &ExtensionConnectionApplyConfiguration{} +} + +// WithFailureMode sets the FailureMode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the FailureMode field is set to the value of the last call. +func (b *ExtensionConnectionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConnectionApplyConfiguration { + b.FailureMode = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go new file mode 100644 index 00000000..c72c0306 --- /dev/null +++ b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go @@ -0,0 +1,65 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +// ExtensionReferenceApplyConfiguration represents a declarative configuration of the ExtensionReference type for use +// with apply. +type ExtensionReferenceApplyConfiguration struct { + Group *string `json:"group,omitempty"` + Kind *string `json:"kind,omitempty"` + Name *string `json:"name,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` +} + +// ExtensionReferenceApplyConfiguration constructs a declarative configuration of the ExtensionReference type for use with +// apply. +func ExtensionReference() *ExtensionReferenceApplyConfiguration { + return &ExtensionReferenceApplyConfiguration{} +} + +// WithGroup sets the Group field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Group field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithGroup(value string) *ExtensionReferenceApplyConfiguration { + b.Group = &value + return b +} + +// WithKind sets the Kind field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kind field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithKind(value string) *ExtensionReferenceApplyConfiguration { + b.Kind = &value + return b +} + +// WithName sets the Name field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Name field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithName(value string) *ExtensionReferenceApplyConfiguration { + b.Name = &value + return b +} + +// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the TargetPortNumber field is set to the value of the last call. +func (b *ExtensionReferenceApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionReferenceApplyConfiguration { + b.TargetPortNumber = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go index e8084155..e132f74b 100644 --- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go @@ -24,8 +24,9 @@ import ( // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + Selector map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"` + TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` + EndpointPickerConfigApplyConfiguration `json:",inline"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -55,3 +56,11 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) b.TargetPortNumber = &value return b } + +// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ExtensionRef field is set to the value of the last call. +func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration { + b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value + return b +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index eb0264b3..1a71b674 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -31,6 +31,14 @@ import ( func ForKind(kind schema.GroupVersionKind) interface{} { switch kind { // Group=api, Version=v1alpha1 + case v1alpha1.SchemeGroupVersion.WithKind("EndpointPickerConfig"): + return &apiv1alpha1.EndpointPickerConfigApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("Extension"): + return &apiv1alpha1.ExtensionApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConnection"): + return &apiv1alpha1.ExtensionConnectionApplyConfiguration{} + case v1alpha1.SchemeGroupVersion.WithKind("ExtensionReference"): + return &apiv1alpha1.ExtensionReferenceApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("InferenceModel"): return &apiv1alpha1.InferenceModelApplyConfiguration{} case v1alpha1.SchemeGroupVersion.WithKind("InferenceModelSpec"): diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index 8e0ff54d..9e6473b9 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -39,6 +39,53 @@ spec: spec: description: InferencePoolSpec defines the desired state of InferencePool properties: + extensionRef: + description: Extension configures an endpoint picker as an extension + service. + properties: + failureMode: + default: FailClose + description: |- + Configures how the gateway handles the case when the extension is not responsive. + Defaults to failClose. + enum: + - FailOpen + - FailClose + type: string + group: + default: "" + description: |- + Group is the group of the referent. + When unspecified or empty string, core API group is inferred. + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. For example + "Service". + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations MUST NOT + support ExternalName Services. + type: string + name: + description: Name is the name of the referent. + type: string + targetPortNumber: + description: |- + The port number on the pods running the extension. When unspecified, implementations SHOULD infer a + default value of 9002 when the Kind is Service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object selector: additionalProperties: description: |- @@ -72,6 +119,7 @@ spec: minimum: 1 type: integer required: + - extensionRef - selector - targetPortNumber type: object diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index 45bc264d..410c31ed 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -49,6 +49,8 @@ spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool + extensionRef: + name: inference-gateway-ext-proc --- apiVersion: apps/v1 kind: Deployment diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go index 1379285f..6f138fd6 100644 --- a/test/integration/hermetic_test.go +++ b/test/integration/hermetic_test.go @@ -300,22 +300,28 @@ func setUpHermeticServer(t *testing.T, pods []*backend.PodMetrics) (client extPr log.Fatalf("Can't read object manifests at path %v, %v", manifestsPath, err) } - inferenceModels := make([]*v1alpha1.InferenceModel, 0) for _, doc := range docs { inferenceModel := &v1alpha1.InferenceModel{} if err = yaml.Unmarshal(doc, inferenceModel); err != nil { log.Fatalf("Can't unmarshal object: %v", doc) } - if inferenceModel.Kind != "InferenceModel" { - continue + if inferenceModel.Kind == "InferenceModel" { + t.Logf("Creating inference model: %+v", inferenceModel) + if err := k8sClient.Create(context.Background(), inferenceModel); err != nil { + log.Fatalf("unable to create inferenceModel %v: %v", inferenceModel.Name, err) + } } - inferenceModels = append(inferenceModels, inferenceModel) } - t.Logf("Inference models to add: %+v", inferenceModels) - for _, model := range inferenceModels { - t.Logf("Creating inference model: %+v", model) - if err := k8sClient.Create(context.Background(), model); err != nil { - log.Fatalf("unable to create inferenceModel %v: %v", model.GetName(), err) + for _, doc := range docs { + inferencePool := &v1alpha1.InferencePool{} + if err = yaml.Unmarshal(doc, inferencePool); err != nil { + log.Fatalf("Can't unmarshal object: %v", doc) + } + if inferencePool.Kind == "InferencePool" { + t.Logf("Creating inference pool: %+v", inferencePool) + if err := k8sClient.Create(context.Background(), inferencePool); err != nil { + log.Fatalf("unable to create inferencePool %v: %v", inferencePool.Name, err) + } } } diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 8703c37a..a07e0f35 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -1,30 +1,25 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: InferencePool metadata: - labels: name: vllm-llama2-7b-pool + namespace: default spec: targetPortNumber: 8000 selector: app: vllm-llama2-7b-pool + extensionRef: + name: epp --- apiVersion: inference.networking.x-k8s.io/v1alpha1 kind: InferenceModel metadata: - labels: - app.kubernetes.io/name: api - app.kubernetes.io/managed-by: kustomize name: inferencemodel-sample namespace: default spec: modelName: sql-lora criticality: Critical poolRef: - # this is the default val: - group: inference.networking.x-k8s.io - # this is the default val: - kind: InferencePool name: vllm-llama2-7b-pool targetModels: - name: sql-lora-1fdg2 - weight: 100 \ No newline at end of file + weight: 100