diff --git a/packages/api/main.go b/packages/api/main.go index 2bfaf3c38..e2493026c 100644 --- a/packages/api/main.go +++ b/packages/api/main.go @@ -45,9 +45,11 @@ const ( maxMultipartMemory = 1 << 27 // 128 MiB maxUploadLimit = 1 << 28 // 256 MiB - maxReadHeaderTimeout = 60 * time.Second - maxReadTimeout = 75 * time.Second - maxWriteTimeout = 75 * time.Second + maxReadTimeout = 75 * time.Second + maxWriteTimeout = 75 * time.Second + // This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition + // https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds + idleTimeout = 620 * time.Second defaultPort = 80 ) @@ -171,12 +173,13 @@ func NewGinServer(ctx context.Context, logger *zap.Logger, apiStore *handlers.AP r.MaxMultipartMemory = maxMultipartMemory s := &http.Server{ - Handler: r, - Addr: fmt.Sprintf("0.0.0.0:%d", port), - ReadHeaderTimeout: maxReadHeaderTimeout, - ReadTimeout: maxReadTimeout, - WriteTimeout: maxWriteTimeout, - BaseContext: func(net.Listener) context.Context { return ctx }, + Handler: r, + Addr: fmt.Sprintf("0.0.0.0:%d", port), + // Configure timeouts to be greater than the proxy timeouts. + ReadTimeout: maxReadTimeout, + WriteTimeout: maxWriteTimeout, + IdleTimeout: idleTimeout, + BaseContext: func(net.Listener) context.Context { return ctx }, } return s diff --git a/packages/client-proxy/internal/proxy/proxy.go b/packages/client-proxy/internal/proxy/proxy.go index de937dd97..f857e2022 100644 --- a/packages/client-proxy/internal/proxy/proxy.go +++ b/packages/client-proxy/internal/proxy/proxy.go @@ -23,7 +23,10 @@ const ( maxRetries = 3 minOrchestratorProxyConns = 3 - idleTimeout = 620 * time.Second + // This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition + // Also it's a good practice to set it to a value higher than the idle timeout of the backend service + // https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds + idleTimeout = 610 * time.Second // We use a constant connection key, because we don't have to separate connection pools // as we need to do when connecting to sandboxes (from orchestrator proxy) to prevent reuse of pool connections diff --git a/packages/orchestrator/internal/proxy/proxy.go b/packages/orchestrator/internal/proxy/proxy.go index 1d6ddfac5..e5a111507 100644 --- a/packages/orchestrator/internal/proxy/proxy.go +++ b/packages/orchestrator/internal/proxy/proxy.go @@ -19,7 +19,10 @@ import ( const ( minSandboxConns = 1 - idleTimeout = 630 * time.Second + // This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition + // Also it's a good practice to set it to higher values as you progress in the stack + // https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds + idleTimeout = 620 * time.Second ) func NewSandboxProxy(port uint, sandboxes *smap.Map[*sandbox.Sandbox]) (*reverse_proxy.Proxy, error) { diff --git a/packages/shared/pkg/proxy/proxy.go b/packages/shared/pkg/proxy/proxy.go index 08180fb0c..9eef8a207 100644 --- a/packages/shared/pkg/proxy/proxy.go +++ b/packages/shared/pkg/proxy/proxy.go @@ -12,6 +12,7 @@ import ( ) const maxClientConns = 8192 // Reasonably big number that is lower than the number of available ports. +const idleTimeoutBufferUpstreamDownstream = 10 type Proxy struct { http.Server @@ -33,10 +34,13 @@ func New( return &Proxy{ Server: http.Server{ - Addr: fmt.Sprintf(":%d", port), - ReadTimeout: 0, - WriteTimeout: 0, - IdleTimeout: idleTimeout, + Addr: fmt.Sprintf(":%d", port), + ReadTimeout: 0, + WriteTimeout: 0, + // Downstream (client side) idle > upstream (server side) idle + // otherwise a new connection between downstream idle and upstream idle + // will try to reuse an upstream connection which is in `CLOSE_WAIT` state resulting in error + IdleTimeout: idleTimeout + idleTimeoutBufferUpstreamDownstream, ReadHeaderTimeout: 0, Handler: handler(p, getDestination), },