enable reverse proxy in server-requesting pod

delavet · delavet · commit 1b84ee772e41 · 2026-04-14T19:34:17.000+08:00
diff --git a/cmd/requester/main.go b/cmd/requester/main.go
@@ -24,6 +24,7 @@ import (
 
 	"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/server/requester/coordination"
 	"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/server/requester/probes"
+	"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/server/requester/proxy"
 	"sigs.k8s.io/controller-runtime/pkg/manager/signals"
 
 	"k8s.io/klog/v2"
@@ -48,6 +49,11 @@ func main() {
 		spiPort = "8081"
 	}
 
+	proxyPort := os.Getenv("PROXY_PORT")
+	if proxyPort == "" {
+		proxyPort = "8082"
+	}
+
 	var ready atomic.Bool
 
 	var wg sync.WaitGroup
@@ -72,5 +78,14 @@ func main() {
 		}
 	}()
 
+	// Start the reverse proxy server
+	go func() {
+		defer wg.Done()
+		err := proxy.Run(ctx, proxyPort)
+		if err != nil {
+			logger.Error(err, "failed to start requester proxy server")
+		}
+	}()
+
 	wg.Wait()
 }
diff --git a/docs/dual-pods.md b/docs/dual-pods.md
@@ -125,6 +125,39 @@ assigned to the server-requesting Pod) for running `vllm serve`. To
 swap a model out, the controller issues a request that does not
 include those details.
 
+#### Requester Reverse Proxy
+
+The requester container includes a reverse proxy server that forwards
+inference requests to the actual vLLM instance running in the
+server-providing Pod (typically managed by the launcher). This
+abstraction allows clients to send requests to the server-requesting
+Pod without needing to know the actual port vLLM is listening on.
+
+The reverse proxy operates as follows:
+
+1. **Initialization**: When the dual-pods controller binds a
+   server-requesting Pod to a server-providing Pod, it sends an HTTP
+   POST request to the requester's proxy initialization endpoint
+   (`/v1/proxy/init`). The request body contains the target address
+   (launcher Pod IP) and the allocated port:
+
+   ```json
+   {"address": "10.244.1.5", "port": 8005}
+   ```
+
+2. **Request forwarding**: Once initialized, the reverse proxy
+   forwards all incoming HTTP requests to the configured vLLM
+   instance. This includes OpenAI-compatible API endpoints like
+   `/v1/chat/completions`, `/v1/completions`, etc.
+
+3. **Status checking**: The proxy's initialization status can be
+   queried via an HTTP GET request to `/v1/proxy/init`.
+
+This design decouples the client-facing endpoint (server-requesting
+Pod) from the actual inference server location (server-providing Pod
+with dynamic port), enabling flexible resource management and model
+swapping without disrupting inference clients.
+
 ### Scenarios
 
 The outer product of
diff --git a/pkg/controller/dual-pods/inference-server.go b/pkg/controller/dual-pods/inference-server.go
@@ -574,6 +574,15 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
 				return fmt.Errorf("launcher Pod %q has no IP assigned yet", launcherPod.Name), true
 			}
 
+			// Initialize the reverse proxy between the launcher Pod and the requester Pod.
+			// Requests can be proxied to the launcher Pod from the requester Pod.
+			url := fmt.Sprintf("http://%s:%s%s", requestingPod.Status.PodIP, adminPort, stubapi.InitProxy)
+			if err := doPostWithData(url, bytes.NewReader([]byte(fmt.Sprintf("{\"address\":\"%s\",\"port\":%d}",
+				launcherIP, desiredPort)))); err != nil {
+				logger.Error(err, "Failed to initialize requester proxy")
+				return err, true
+			}
+
 			launcherBaseURL := fmt.Sprintf("http://%s:%d", launcherIP, ctlrcommon.LauncherServicePort)
 			lClient, err := NewLauncherClient(launcherBaseURL)
 			if err != nil {
@@ -1357,13 +1366,16 @@ func (ctl *controller) ensureReqState(ctx context.Context, requestingPod *corev1
 	return err, err != nil
 }
 
-// doPost does the HTTP POST request/response to the given URL.
 func doPost(url string) error {
+	return doPostWithData(url, nil)
+}
+
+func doPostWithData(url string, data io.Reader) error {
 	client := &http.Client{
 		Timeout: 5 * time.Second,
 	}
 
-	resp, err := client.Post(url, "application/json", nil)
+	resp, err := client.Post(url, "application/json", data)
 	if err != nil {
 		return fmt.Errorf("http post %q: %w", url, err)
 	}
diff --git a/pkg/server/requester/coordination/server.go b/pkg/server/requester/coordination/server.go
@@ -31,6 +31,7 @@ import (
 
 	"k8s.io/klog/v2"
 
+	"github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/server/requester/proxy"
 	stubapi "github.com/llm-d-incubation/llm-d-fast-model-actuation/pkg/spi"
 )
 
@@ -212,6 +213,7 @@ func RunWithGPUUUIDs(ctx context.Context, port string, ready *atomic.Bool, logWr
 	mux.HandleFunc("POST "+stubapi.BecomeReadyPath, newSetReadyHandler(logger, ready, true))
 	mux.HandleFunc("POST "+stubapi.BecomeUnreadyPath, newSetReadyHandler(logger, ready, false))
 	mux.HandleFunc("POST "+stubapi.SetLogPath, newSetLogHandler(logger, logWriter))
+	mux.HandleFunc(stubapi.InitProxy, proxy.Initialize)
 
 	server := &http.Server{
 		Addr:    ":" + port,
diff --git a/pkg/server/requester/proxy/server.go b/pkg/server/requester/proxy/server.go
@@ -0,0 +1,187 @@
+/*
+Copyright 2025 The llm-d Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package proxy
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"k8s.io/klog/v2"
+)
+
+// ConfigRequest is the request body to configure the proxy target
+type ConfigRequest struct {
+	Address string `json:"address"`
+	Port    int    `json:"port"`
+}
+
+// proxy is a lazy HTTP reverse proxy that only starts after receiving
+// the first configuration request
+type proxy struct {
+	mu          sync.RWMutex
+	targetURL   *url.URL
+	proxy       *httputil.ReverseProxy
+	initialized atomic.Bool
+}
+
+// singleton instance initialized once at startup
+var instance = &proxy{}
+
+// Run starts the proxy server on the given port
+func Run(ctx context.Context, port string) error {
+	logger := klog.FromContext(ctx).WithName("proxy-server")
+	logger.Info("starting proxy server")
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/", serveProxy)
+
+	server := &http.Server{
+		Addr:         fmt.Sprintf(":%s", port),
+		Handler:      mux,
+		ReadTimeout:  30 * time.Second,
+		WriteTimeout: 5 * time.Minute, // Long timeout for inference requests
+		IdleTimeout:  120 * time.Second,
+	}
+
+	go func() {
+		<-ctx.Done()
+		logger.Info("shutting down")
+
+		ctx, cancelFn := context.WithTimeout(context.Background(), 60*time.Second)
+		defer cancelFn()
+		if err := server.Shutdown(ctx); err != nil {
+			logger.Error(err, "failed to gracefully shutdown")
+		}
+	}()
+
+	logger.Info("starting server", "port", port)
+	if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+		return fmt.Errorf("listen and serve error: %w", err)
+	}
+
+	logger.Info("server stopped")
+	return nil
+}
+
+// serveProxy proxies requests to the target server
+func serveProxy(w http.ResponseWriter, r *http.Request) {
+	if !instance.initialized.Load() {
+		http.Error(w, "proxy not initialized", http.StatusServiceUnavailable)
+		return
+	}
+
+	// Proxy the request
+	instance.proxy.ServeHTTP(w, r)
+}
+
+// Initialize handles proxy initialization and configuration
+func Initialize(w http.ResponseWriter, r *http.Request) {
+	// Get proxy status
+	if r.Method == http.MethodGet {
+		if instance.initialized.Load() {
+			targetURL := instance.targetURL
+			w.WriteHeader(http.StatusOK)
+			if targetURL != nil {
+				fmt.Fprintf(w, "proxying to %s", targetURL)
+			} else {
+				_, _ = w.Write([]byte("proxy initialized but targetURL is nil"))
+			}
+		} else {
+			w.WriteHeader(http.StatusOK)
+			_, _ = w.Write([]byte("proxy not initialized"))
+		}
+		return
+	}
+
+	if r.Method != http.MethodPost {
+		http.Error(w, "invalid method", http.StatusMethodNotAllowed)
+		return
+	}
+
+	// Try initialize server
+	if instance.initialized.Load() {
+		http.Error(w, "proxy already initialized", http.StatusConflict)
+		return
+	}
+
+	// Need to initialize - acquire write lock
+	instance.mu.Lock()
+	defer instance.mu.Unlock()
+
+	// Double-check after acquiring write lock
+	if instance.initialized.Load() {
+		http.Error(w, "proxy already initialized", http.StatusConflict)
+		return
+	}
+
+	// Parse configuration from request body
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to read request body: %v", err), http.StatusBadRequest)
+		return
+	}
+	defer r.Body.Close()
+
+	var config ConfigRequest
+	if err := json.Unmarshal(body, &config); err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse JSON: %v", err), http.StatusBadRequest)
+		return
+	}
+
+	if config.Address == "" {
+		http.Error(w, "address is required", http.StatusBadRequest)
+		return
+	}
+
+	if config.Port <= 0 || config.Port > 65535 {
+		http.Error(w, "invalid port", http.StatusBadRequest)
+		return
+	}
+
+	// Create target URL
+	targetURL := &url.URL{
+		Scheme: "http",
+		Host:   net.JoinHostPort(config.Address, fmt.Sprintf("%d", config.Port)),
+	}
+
+	// Create the reverse proxy
+	instance.targetURL = targetURL
+	instance.proxy = httputil.NewSingleHostReverseProxy(targetURL)
+
+	// Customize error handling
+	originalErrorHandler := instance.proxy.ErrorHandler
+	instance.proxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) {
+		if originalErrorHandler != nil {
+			originalErrorHandler(w, r, err)
+		} else {
+			http.Error(w, fmt.Sprintf("proxy error: %v", err), http.StatusBadGateway)
+		}
+	}
+
+	instance.initialized.Store(true)
+	w.WriteHeader(http.StatusOK)
+	fmt.Fprintf(w, "initialized proxy to: %s", targetURL)
+}
diff --git a/pkg/spi/interface.go b/pkg/spi/interface.go
@@ -59,3 +59,15 @@ const SetLogPath = "/v1/set-log"
 // LogStartPosParam is the name of the query parameter that
 // holds that starting position of a log chunk.
 const LogStartPosParam = "startPos"
+
+// InitProxy is the path for initializing the HTTP reverse proxy。
+// The proxy is used to forward requests from the server-requesting
+// pod to the server-providing pod.
+// Supports two HTTP methods:
+//   - GET: retrieves the initialization status of the proxy.
+//     Returns  status info.
+//   - POST: initializes the proxy with a target address and port.
+//     The request body should contain a JSON object with "address"
+//     and "port" fields. After successful initialization,
+//     the proxy will forward requests to the configured target server.
+const InitProxy = "/v1/proxy/init"
diff --git a/test/e2e/mkobjs-openshift.sh b/test/e2e/mkobjs-openshift.sh
@@ -227,6 +227,8 @@ spec:
             containerPort: 8080
           - name: spi
             containerPort: 8081
+          - name: proxy          
+            containerPort: 8082
           readinessProbe:
             httpGet:
               path: /ready
diff --git a/test/e2e/mkobjs.sh b/test/e2e/mkobjs.sh
@@ -183,6 +183,8 @@ spec:
             containerPort: 8080
           - name: spi
             containerPort: 8081
+          - name: proxy          
+            containerPort: 8082
           readinessProbe:
             httpGet:
               path: /ready