fix: make HuggingFace token optional for non-gated models

sozercan · sozercan · commit a1f03408eab2 · 2026-03-04T14:56:30.000-08:00
Only set spec.secrets.huggingFaceToken on ModelDeployment CRs when the model is gated. Previously, the Web UI hardcoded hfTokenSecret for all deployments, causing non-gated models (Qwen, DeepSeek, TinyLlama, Phi-3) to fail with CreateContainerConfigError when the referenced K8s Secret did not exist. Changes: - Conditional hfTokenSecret default based on model.gated - Guard in handleSubmit to strip secret for non-gated models - Remove KAITO exception from needsHfAuth button disable logic - Add clarifying comments to sample YAMLs Fixes #43
diff --git a/controller/config/samples/kubeairunway_v1alpha1_modeldeployment.yaml b/controller/config/samples/kubeairunway_v1alpha1_modeldeployment.yaml
@@ -20,6 +20,7 @@ spec:
     gpu:
       count: 1
     memory: "32Gi"
+  # Required: Llama is a gated model requiring HuggingFace authentication
   secrets:
     huggingFaceToken: "hf-token"
 ---
@@ -77,5 +78,6 @@ spec:
       gpu:
         count: 2
       memory: "64Gi"
+  # Required: Llama is a gated model requiring HuggingFace authentication
   secrets:
     huggingFaceToken: "hf-token"
diff --git a/controller/config/samples/kubeairunway_v1alpha1_modeldeployment_llmd.yaml b/controller/config/samples/kubeairunway_v1alpha1_modeldeployment_llmd.yaml
@@ -24,6 +24,7 @@ spec:
     gpu:
       count: 1
     memory: "24Gi"
+  # Required: Llama is a gated model requiring HuggingFace authentication
   secrets:
     huggingFaceToken: "llm-d-hf-token"
 ---
@@ -54,5 +55,6 @@ spec:
       gpu:
         count: 4
       memory: "96Gi"
+  # Required: Llama is a gated model requiring HuggingFace authentication
   secrets:
     huggingFaceToken: "llm-d-hf-token"
diff --git a/frontend/src/components/deployments/DeploymentForm.tsx b/frontend/src/components/deployments/DeploymentForm.tsx
@@ -248,7 +248,7 @@ export function DeploymentForm({ model, detailedCapacity, autoscaler, runtimes }
     provider: getDefaultRuntime(),
     routerMode: 'none',
     replicas: 1,
-    hfTokenSecret: import.meta.env.VITE_DEFAULT_HF_SECRET || 'hf-token-secret',
+    hfTokenSecret: model.gated ? (import.meta.env.VITE_DEFAULT_HF_SECRET || 'hf-token-secret') : '',
     enforceEager: true,
     enablePrefixCaching: false,
     trustRemoteCode: false,
@@ -370,6 +370,11 @@ export function DeploymentForm({ model, detailedCapacity, autoscaler, runtimes }
       // Build the deployment config, adding KAITO-specific fields if needed
       let deployConfig = { ...config }
 
+      // Only include hfTokenSecret for gated models
+      if (!model.gated) {
+        delete deployConfig.hfTokenSecret;
+      }
+
       if (selectedRuntime === 'kaito') {
         // Add kaitoResourceType to all KAITO deployments
         deployConfig = { ...deployConfig, kaitoResourceType }
@@ -592,7 +597,7 @@ export function DeploymentForm({ model, detailedCapacity, autoscaler, runtimes }
 
   // Status-aware button content
   const getButtonContent = () => {
-    if (needsHfAuth && selectedRuntime !== 'kaito') {
+    if (needsHfAuth) {
       return 'HuggingFace Auth Required'
     }
 
@@ -1558,7 +1563,7 @@ export function DeploymentForm({ model, detailedCapacity, autoscaler, runtimes }
         </Button>
         <Button
           type="submit"
-          disabled={createDeployment.isProcessing || (needsHfAuth && selectedRuntime !== 'kaito') || !isRuntimeInstalled || !isKaitoConfigValid}
+          disabled={createDeployment.isProcessing || needsHfAuth || !isRuntimeInstalled || !isKaitoConfigValid}
           loading={createDeployment.isProcessing}
           className={cn(
             "flex-1 gap-2",