hdefazio
diff --git a/‎cmd/router/main.go‎
Lines changed: 53 additions & 14 deletions b/‎cmd/router/main.go‎
Lines changed: 53 additions & 14 deletions
diff --git a/‎cmd/router/main_test.go‎
Lines changed: 136 additions & 1 deletion b/‎cmd/router/main_test.go‎
Lines changed: 136 additions & 1 deletion
diff --git a/‎config/crd/full/serving.kserve.io_inferencegraphs.yaml‎
Lines changed: 15 additions & 0 deletions b/‎config/crd/full/serving.kserve.io_inferencegraphs.yaml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎pkg/apis/serving/v1alpha1/inference_graph.go‎
Lines changed: 19 additions & 1 deletion b/‎pkg/apis/serving/v1alpha1/inference_graph.go‎
Lines changed: 19 additions & 1 deletion
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+	http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -45,8 +45,6 @@ import (
 	"github.com/kserve/kserve/pkg/constants"
 )
 
-var log = logf.Log.WithName("InferenceGraphRouter")
-
 // _isInMesh is an auxiliary global variable for isInIstioMesh function.
 var _isInMesh *bool
 
@@ -147,7 +145,16 @@ func callService(serviceUrl string, input []byte, headers http.Header) ([]byte,
 	if val := req.Header.Get("Content-Type"); val == "" {
 		req.Header.Add("Content-Type", "application/json")
 	}
-	resp, err := http.DefaultClient.Do(req)
+
+	var client *http.Client
+	if routerTimeouts == nil || routerTimeouts.ServiceClient == nil {
+		client = http.DefaultClient
+	} else {
+		client = &http.Client{
+			Timeout: time.Duration(*routerTimeouts.ServiceClient) * time.Second,
+		}
+	}
+	resp, err := client.Do(req)
 	if err != nil {
 		log.Error(err, "An error has occurred while calling service", "service", serviceUrl)
 		return nil, 500, err
@@ -373,8 +380,6 @@ func prepareErrorResponse(err error, errorMessage string) []byte {
 	return errorResponseBytes
 }
 
-var inferenceGraph *v1alpha1.InferenceGraphSpec
-
 func graphHandler(w http.ResponseWriter, req *http.Request) {
 	inputBytes, _ := io.ReadAll(req.Body)
 	if response, statusCode, err := routeStep(v1alpha1.GraphRootNodeName, *inferenceGraph, inputBytes, req.Header); err != nil {
@@ -409,6 +414,35 @@ func compilePatterns(patterns []string) ([]*regexp.Regexp, error) {
 	return compiled, goerrors.Join(allErrors...)
 }
 
+func getTimeout(value, defaultValue *int64) *int64 {
+	if value != nil {
+		return value
+	}
+	return defaultValue
+}
+
+func initTimeouts(graph v1alpha1.InferenceGraphSpec) {
+	defaultServerRead := int64(constants.RouterTimeoutsServerRead)
+	defaultServerWrite := int64(constants.RouterTimeoutServerWrite)
+	defaultServerIdle := int64(constants.RouterTimeoutServerIdle)
+
+	timeouts := &v1alpha1.InfereceGraphRouterTimeouts{
+		ServerRead:    &defaultServerRead,
+		ServerWrite:   &defaultServerWrite,
+		ServerIdle:    &defaultServerIdle,
+		ServiceClient: nil,
+	}
+
+	if graph.RouterTimeouts != nil {
+		timeouts.ServerRead = getTimeout(graph.RouterTimeouts.ServerRead, &defaultServerRead)
+		timeouts.ServerWrite = getTimeout(graph.RouterTimeouts.ServerWrite, &defaultServerWrite)
+		timeouts.ServerIdle = getTimeout(graph.RouterTimeouts.ServerIdle, &defaultServerIdle)
+		timeouts.ServiceClient = getTimeout(graph.RouterTimeouts.ServiceClient, nil)
+	}
+
+	routerTimeouts = timeouts
+}
+
 // Mainly used for kubernetes readiness probe. It responds with "503 shutting down" if server is shutting down,
 // otherwise returns "200 OK".
 func readyHandler(w http.ResponseWriter, req *http.Request) {
@@ -420,10 +454,14 @@ func readyHandler(w http.ResponseWriter, req *http.Request) {
 }
 
 var (
-	jsonGraph              = flag.String("graph-json", "", "serialized json graph def")
+	jsonGraph                                           = flag.String("graph-json", "", "serialized json graph def")
+	inferenceGraph         *v1alpha1.InferenceGraphSpec = nil
 	compiledHeaderPatterns []*regexp.Regexp
-	isShuttingDown         = false
-	drainSleepDuration     = 30 * time.Second
+	isShuttingDown                                               = false
+	drainSleepDuration                                           = 30 * time.Second
+	routerTimeouts         *v1alpha1.InfereceGraphRouterTimeouts = nil
+	log                                                          = logf.Log.WithName("InferenceGraphRouter")
+	signalChan                                                   = make(chan os.Signal, 1)
 )
 
 func main() {
@@ -438,22 +476,24 @@ func main() {
 			log.Error(err, "Failed to compile some header patterns")
 		}
 	}
+
 	inferenceGraph = &v1alpha1.InferenceGraphSpec{}
 	err := json.Unmarshal([]byte(*jsonGraph), inferenceGraph)
 	if err != nil {
 		log.Error(err, "failed to unmarshall inference graph json")
 		os.Exit(1)
 	}
+	initTimeouts(*inferenceGraph)
 
 	http.HandleFunc("/", graphHandler)
 	http.HandleFunc(constants.RouterReadinessEndpoint, readyHandler)
 
 	server := &http.Server{
 		Addr:         ":" + strconv.Itoa(constants.RouterPort),
-		Handler:      nil,             // default server mux
-		ReadTimeout:  time.Minute,     // https://medium.com/a-journey-with-go/go-understand-and-mitigate-slowloris-attack-711c1b1403f6
-		WriteTimeout: time.Minute,     // set the maximum duration before timing out writes of the response
-		IdleTimeout:  3 * time.Minute, // set the maximum amount of time to wait for the next request when keep-alives are enabled
+		Handler:      nil,                                                      // default server mux
+		ReadTimeout:  time.Duration(*routerTimeouts.ServerRead) * time.Second,  // set the maximum duration for reading the entire request, including the body
+		WriteTimeout: time.Duration(*routerTimeouts.ServerWrite) * time.Second, // set the maximum duration before timing out writes of the response
+		IdleTimeout:  time.Duration(*routerTimeouts.ServerIdle) * time.Second,  // set the maximum amount of time to wait for the next request when keep-alives are enabled
 	}
 
 	go func() {
@@ -469,7 +509,6 @@ func main() {
 }
 
 func handleSignals(server *http.Server) {
-	signalChan := make(chan os.Signal, 1)
 	signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM)
 
 	sig := <-signalChan
 
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+	http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,13 +17,17 @@ limitations under the License.
 package main
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 	"net/http/httptest"
 	"regexp"
+	"strconv"
+	"syscall"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -32,12 +36,17 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
 	"github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
+	"github.com/kserve/kserve/pkg/constants"
 )
 
 func init() {
 	logf.SetLogger(zap.New())
 }
 
+func Int64Ptr(i int64) *int64 {
+	return &i
+}
+
 func TestSimpleModelChainer(t *testing.T) {
 	// Start a local HTTP server
 	model1 := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
@@ -883,3 +892,129 @@ func TestCallServiceWhenMultipleHeadersToPropagateUsingInvalidPattern(t *testing
 	fmt.Printf("final response:%v\n", response)
 	require.Equal(t, expectedResponse, response)
 }
+
+func TestServerTimeout(t *testing.T) {
+	testCases := []struct {
+		name                string
+		serverTimeout       *int64
+		serviceStepDuration time.Duration
+		expectError         bool
+	}{
+		{
+			name:                "default",
+			serverTimeout:       nil,
+			serviceStepDuration: 1 * time.Millisecond,
+			expectError:         false,
+		},
+		{
+			name:                "timeout",
+			serverTimeout:       Int64Ptr(1),
+			serviceStepDuration: 500 * time.Millisecond,
+			expectError:         true,
+		},
+		{
+			name:                "success",
+			serverTimeout:       Int64Ptr(2),
+			serviceStepDuration: 500 * time.Millisecond,
+			expectError:         false,
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.name, func(t *testing.T) {
+			drainSleepDuration = 0 * time.Millisecond // instant shutdown
+
+			// Setup and start dummy models
+			model1 := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+				_, err := io.ReadAll(req.Body)
+				if err != nil {
+					return
+				}
+				time.Sleep(testCase.serviceStepDuration)
+				response := map[string]interface{}{"predictions": "1"}
+				responseBytes, _ := json.Marshal(response)
+				rw.Write(responseBytes)
+			}))
+			model1Url, err := apis.ParseURL(model1.URL)
+			if err != nil {
+				t.Fatalf("Failed to parse model url")
+			}
+			defer model1.Close()
+
+			model2 := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+				_, err := io.ReadAll(req.Body)
+				if err != nil {
+					return
+				}
+				time.Sleep(testCase.serviceStepDuration)
+				response := map[string]interface{}{"predictions": "2"}
+				responseBytes, _ := json.Marshal(response)
+				rw.Write(responseBytes)
+			}))
+			model2Url, err := apis.ParseURL(model2.URL)
+			if err != nil {
+				t.Fatalf("Failed to parse model url")
+			}
+			defer model2.Close()
+
+			// Create InferenceGraph
+			graphSpec := v1alpha1.InferenceGraphSpec{
+				Nodes: map[string]v1alpha1.InferenceRouter{
+					"root": {
+						RouterType: v1alpha1.Sequence,
+						Steps: []v1alpha1.InferenceStep{
+							{
+								StepName: "model1",
+								InferenceTarget: v1alpha1.InferenceTarget{
+									ServiceURL: model1Url.String(),
+								},
+							},
+							{
+								StepName: "model2",
+								InferenceTarget: v1alpha1.InferenceTarget{
+									ServiceURL: model2Url.String(),
+								},
+								Data: "$response",
+							},
+						},
+					},
+				},
+			}
+			if testCase.serverTimeout != nil {
+				timeout := *testCase.serverTimeout
+				graphSpec.RouterTimeouts = &v1alpha1.InfereceGraphRouterTimeouts{
+					ServerRead:  &timeout,
+					ServerWrite: &timeout,
+					ServerIdle:  &timeout,
+				}
+			}
+			jsonBytes, _ := json.Marshal(graphSpec)
+			*jsonGraph = string(jsonBytes)
+
+			// Start InferenceGraph router server in a separate goroutine
+			go func() {
+				main()
+			}()
+			t.Cleanup(func() {
+				http.DefaultServeMux = http.NewServeMux() // reset http handlers
+				signalChan <- syscall.SIGTERM             // shutdown the server
+			})
+
+			// Call the InferenceGraph
+			client := &http.Client{}
+			time.Sleep(1 * time.Second) // prevent race condition
+			req, _ := http.NewRequest(http.MethodPost, "http://localhost:"+strconv.Itoa(constants.RouterPort), bytes.NewBuffer(nil))
+			resp, err := client.Do(req)
+			if resp != nil {
+				defer resp.Body.Close()
+			}
+
+			if testCase.expectError {
+				assert.Contains(t, err.Error(), "EOF")
+			} else {
+				require.NoError(t, err)
+				assert.Equal(t, http.StatusOK, resp.StatusCode)
+			}
+		})
+	}
+}
@@ -561,6 +561,21 @@ spec:
                       x-kubernetes-int-or-string: true
                     type: object
                 type: object
+              routerTimeouts:
+                properties:
+                  serverIdle:
+                    format: int64
+                    type: integer
+                  serverRead:
+                    format: int64
+                    type: integer
+                  serverWrite:
+                    format: int64
+                    type: integer
+                  serviceClient:
+                    format: int64
+                    type: integer
+                type: object
               scaleMetric:
                 enum:
                 - cpu
 
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+	http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -52,6 +52,8 @@ type InferenceGraphSpec struct {
 	// TimeoutSeconds specifies the number of seconds to wait before timing out a request to the component.
 	// +optional
 	TimeoutSeconds *int64 `json:"timeout,omitempty"`
+	// +optional
+	RouterTimeouts *InfereceGraphRouterTimeouts `json:"routerTimeouts,omitempty"`
 	// Minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero.
 	// +optional
 	MinReplicas *int32 `json:"minReplicas,omitempty"`
@@ -115,6 +117,22 @@ const (
 	GraphRootNodeName string = "root"
 )
 
+// +k8s:openapi-gen=true
+type InfereceGraphRouterTimeouts struct {
+	// ServerRead specifies the number of seconds to wait before timing out a request read by the server.
+	// +optional
+	ServerRead *int64 `json:"serverRead,omitempty"`
+	// ServerWrite specifies the maximum duration in seconds before timing out writes of the response.
+	// +optional
+	ServerWrite *int64 `json:"serverWrite,omitempty"`
+	// ServerIdle specifies the maximum amount of time in seconds to wait for the next request when keep-alives are enabled.
+	// +optional
+	ServerIdle *int64 `json:"serverIdle,omitempty"`
+	// ServiceClient specifies a time limit in seconds for requests made to the graph components by HTTP client.
+	// +optional
+	ServiceClient *int64 `json:"serviceClient,omitempty"`
+}
+
 // +k8s:openapi-gen=true
 // InferenceRouter defines the router for each InferenceGraph node with one or multiple steps
 //