|
| 1 | +package endpointpicker |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "time" |
| 6 | + |
| 7 | + envoy_config_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" |
| 8 | + "google.golang.org/protobuf/types/known/anypb" |
| 9 | + "google.golang.org/protobuf/types/known/durationpb" |
| 10 | + "google.golang.org/protobuf/types/known/wrapperspb" |
| 11 | + "istio.io/istio/pkg/kube/kclient" |
| 12 | + "istio.io/istio/pkg/kube/krt" |
| 13 | + "k8s.io/apimachinery/pkg/runtime/schema" |
| 14 | + infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" |
| 15 | + |
| 16 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/extensions2/common" |
| 17 | + extensionsplug "github.com/kgateway-dev/kgateway/v2/internal/kgateway/extensions2/plugin" |
| 18 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/extensions2/settings" |
| 19 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/ir" |
| 20 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/krtcollections" |
| 21 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/utils/krtutil" |
| 22 | + "github.com/kgateway-dev/kgateway/v2/internal/kgateway/wellknown" |
| 23 | +) |
| 24 | + |
| 25 | +func NewPlugin(ctx context.Context, commoncol *common.CommonCollections) extensionsplug.Plugin { |
| 26 | + poolClient := kclient.New[*infextv1a1.InferencePool](commoncol.Client) |
| 27 | + pools := krt.WrapClient(poolClient, commoncol.KrtOpts.ToOptions("InferencePools")...) |
| 28 | + return NewPluginFromCollections(ctx, commoncol.KrtOpts, pools, commoncol.Pods, commoncol.Settings) |
| 29 | +} |
| 30 | + |
| 31 | +func NewPluginFromCollections( |
| 32 | + ctx context.Context, |
| 33 | + krtOpts krtutil.KrtOptions, |
| 34 | + pools krt.Collection[*infextv1a1.InferencePool], |
| 35 | + pods krt.Collection[krtcollections.LocalityPod], |
| 36 | + stngs settings.Settings, |
| 37 | +) extensionsplug.Plugin { |
| 38 | + gk := schema.GroupKind{ |
| 39 | + Group: infextv1a1.GroupVersion.Group, |
| 40 | + Kind: wellknown.InferencePoolKind, |
| 41 | + } |
| 42 | + |
| 43 | + // TODO [danehans]: Filter InferencePools based one's that are referenced by an HTTPRoute |
| 44 | + // with a status.parents[].controllerName that matches our Gateway controllerName. |
| 45 | + infPoolUpstream := krt.NewCollection(pools, func(kctx krt.HandlerContext, pool *infextv1a1.InferencePool) *ir.Upstream { |
| 46 | + return &ir.Upstream{ |
| 47 | + ObjectSource: ir.ObjectSource{ |
| 48 | + Kind: gk.Kind, |
| 49 | + Group: gk.Group, |
| 50 | + Namespace: pool.Namespace, |
| 51 | + Name: pool.Name, |
| 52 | + }, |
| 53 | + Obj: pool, |
| 54 | + Port: pool.Spec.TargetPortNumber, |
| 55 | + GvPrefix: "endpoint-picker", |
| 56 | + CanonicalHostname: "", |
| 57 | + } |
| 58 | + }, krtOpts.ToOptions("EndpointPickerUpstreams")...) |
| 59 | + |
| 60 | + // Create the endpoints collection |
| 61 | + inputs := krtcollections.NewInfPoolEndpointsInputs(krtOpts, infPoolUpstream, pods) |
| 62 | + infPoolEndpoints := krtcollections.NewInfPoolEndpoints(ctx, inputs) |
| 63 | + |
| 64 | + return extensionsplug.Plugin{ |
| 65 | + ContributesUpstreams: map[schema.GroupKind]extensionsplug.UpstreamPlugin{ |
| 66 | + gk: { |
| 67 | + UpstreamInit: ir.UpstreamInit{ |
| 68 | + InitUpstream: processUpstream, |
| 69 | + }, |
| 70 | + Endpoints: infPoolEndpoints, |
| 71 | + Upstreams: infPoolUpstream, |
| 72 | + }, |
| 73 | + }, |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +func processUpstream(ctx context.Context, in ir.Upstream, out *envoy_config_cluster_v3.Cluster) { |
| 78 | + // Set cluster type to ORIGINAL_DST |
| 79 | + out.ClusterDiscoveryType = &envoy_config_cluster_v3.Cluster_Type{ |
| 80 | + Type: envoy_config_cluster_v3.Cluster_ORIGINAL_DST, |
| 81 | + } |
| 82 | + |
| 83 | + // Set connect timeout to 1000 seconds. |
| 84 | + // TODO [danehans]: Figure out an API that can be used to set this value. |
| 85 | + out.ConnectTimeout = durationpb.New(1000 * time.Second) |
| 86 | + |
| 87 | + // Use CLUSTER_PROVIDED load balancing. |
| 88 | + out.LbPolicy = envoy_config_cluster_v3.Cluster_CLUSTER_PROVIDED |
| 89 | + |
| 90 | + // Configure circuit breakers with a single threshold. |
| 91 | + // TODO [danehans]: Figure out an API that can be used to set these values. |
| 92 | + out.CircuitBreakers = &envoy_config_cluster_v3.CircuitBreakers{ |
| 93 | + Thresholds: []*envoy_config_cluster_v3.CircuitBreakers_Thresholds{ |
| 94 | + { |
| 95 | + MaxConnections: wrapperspb.UInt32(40000), |
| 96 | + MaxPendingRequests: wrapperspb.UInt32(40000), |
| 97 | + MaxRequests: wrapperspb.UInt32(40000), |
| 98 | + }, |
| 99 | + }, |
| 100 | + } |
| 101 | + |
| 102 | + // If OriginalDstLbConfig is not available on Cluster, |
| 103 | + // encode the configuration as a typed extension. |
| 104 | + // Note: The type URL will be "type.googleapis.com/envoy.config.cluster.v3.Cluster_OriginalDstLbConfig". |
| 105 | + lbConfig := &envoy_config_cluster_v3.Cluster_OriginalDstLbConfig{ |
| 106 | + UseHttpHeader: true, |
| 107 | + HttpHeaderName: "x-gateway-destination-endpoint", |
| 108 | + } |
| 109 | + anyLbConfig, err := anypb.New(lbConfig) |
| 110 | + if err != nil { |
| 111 | + // handle error appropriately |
| 112 | + return |
| 113 | + } |
| 114 | + out.TypedExtensionProtocolOptions = map[string]*anypb.Any{ |
| 115 | + "envoy.lb": anyLbConfig, |
| 116 | + } |
| 117 | +} |
0 commit comments