ray-project · tiennguyentony · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 29, 2026
@@ -943,7 +943,7 @@ func addWellKnownAcceleratorResources(rayStartParams map[string]string, resource
 		// Scan for resource keys of gpus
 		if _, ok := rayStartParams["num-gpus"]; !ok {
 			if utils.IsGPUResourceKey(resourceKeyString) && !resourceValue.IsZero() {
-				rayStartParams["num-gpus"] = strconv.FormatInt(resourceValue.Value(), 10)
+				rayStartParams["num-gpus"] = strconv.FormatFloat(resourceValue.AsApproximateFloat64(), 'f', -1, 64)
 			}
 		}
 
@@ -1158,7 +1158,9 @@ func updateRayStartParamsResources(ctx context.Context, rayStartParams map[strin
 		} else if normalizedName == string(corev1.ResourceMemory) {
 			rayStartParams["memory"] = strconv.FormatInt(q.Value(), 10)
 		} else if utils.IsGPUResourceKey(normalizedName) {
-			rayStartParams["num-gpus"] = strconv.FormatInt(q.Value(), 10)
+			// Support fractional GPU values (e.g., 0.4 GPU per replica for multi-model serving)
+			// Convert to float to preserve decimal values for Ray autoscaler
+			rayStartParams["num-gpus"] = strconv.FormatFloat(q.AsApproximateFloat64(), 'f', -1, 64)
 		} else {
 			rayResourcesJson[name] = q.AsApproximateFloat64()
 		}

@@ -2167,3 +2167,79 @@ func TestUpdateRayStartParamsResources(t *testing.T) {
 		})
 	}
 }
+
+func TestUpdateRayStartParamsResources_WithFractionalGPU(t *testing.T) {
+	// Test that fractional GPU values are properly converted when using Resources field
+	// See: https://github.com/ray-project/kuberay/issues/4447
+	ctx := context.Background()
+
+	tests := map[string]struct {
+		groupResources       map[string]string
+		expectedNumGPUs      string
+		expectedParamPresent bool
+	}{
+		"Fractional GPU as millicores": {
+			groupResources:       map[string]string{"nvidia.com/gpu": "400m"}, // 400 millicores = 0.4 GPU
+			expectedNumGPUs:      "0.4",
+			expectedParamPresent: true,
+		},
+		"Single GPU": {
+			groupResources:       map[string]string{"nvidia.com/gpu": "1"},
+			expectedNumGPUs:      "1",
+			expectedParamPresent: true,
+		},
+		"Multiple GPUs": {
+			groupResources:       map[string]string{"nvidia.com/gpu": "4"},
+			expectedNumGPUs:      "4",
+			expectedParamPresent: true,
+		},
+	}
+
+	for name, tc := range tests {
+		t.Run(name, func(t *testing.T) {
+			rayStartParams := make(map[string]string)
+			updateRayStartParamsResources(ctx, rayStartParams, tc.groupResources)
+
+			if tc.expectedParamPresent {
+				val, ok := rayStartParams["num-gpus"]
+				assert.True(t, ok, "num-gpus should be set in rayStartParams")
+				assert.Equal(t, tc.expectedNumGPUs, val, "GPU value should match expected fractional value")
+			}
+		})
+	}
+}
+
+func TestAddWellKnownAcceleratorResources_WithFractionalGPU(t *testing.T) {
+	// Test that fractional GPU values are properly converted in container resource limits
+	// This tests the code path used when GPU resources are specified via container.Resources.Limits
+	// See: https://github.com/ray-project/kuberay/issues/4447
+	tests := map[string]struct {
+		resourceLimits  corev1.ResourceList
+		expectedNumGPUs string
+	}{
+		"Fractional GPU as millicores in container limits": {
+			resourceLimits:  corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): *resource.NewMilliQuantity(400, resource.DecimalSI)}, // 400m = 0.4 GPU
+			expectedNumGPUs: "0.4",
+		},
+		"Single GPU in container limits": {
+			resourceLimits:  corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): *resource.NewQuantity(1, resource.DecimalSI)},
+			expectedNumGPUs: "1",
+		},
+		"Multiple GPUs in container limits": {
+			resourceLimits:  corev1.ResourceList{corev1.ResourceName("nvidia.com/gpu"): *resource.NewQuantity(4, resource.DecimalSI)},
+			expectedNumGPUs: "4",
+		},
+	}
+
+	for name, tc := range tests {
+		t.Run(name, func(t *testing.T) {
+			rayStartParams := make(map[string]string)
+			err := addWellKnownAcceleratorResources(rayStartParams, tc.resourceLimits)
+			assert.NoError(t, err, "addWellKnownAcceleratorResources should not return an error")
+
+			val, ok := rayStartParams["num-gpus"]
+			assert.True(t, ok, "num-gpus should be set in rayStartParams")
+			assert.Equal(t, tc.expectedNumGPUs, val, "GPU value should match expected fractional value")
+		})
+	}
+}
diff --git a/ray-operator/test/e2e/raycluster_test.go b/ray-operator/test/e2e/raycluster_test.go
@@ -7,13 +7,15 @@ import (
 	. "github.com/onsi/gomega"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/utils/ptr"
 
 	rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
 	"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
 	rayv1ac "github.com/ray-project/kuberay/ray-operator/pkg/client/applyconfiguration/ray/v1"
+	corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
 	. "github.com/ray-project/kuberay/ray-operator/test/support"
 )
 
@@ -143,6 +145,9 @@ func TestRayClusterWithResourceQuota(t *testing.T) {
 	LogWithTimestamp(test.T(), "Waiting for RayCluster %s/%s to have ReplicaFailure condition", rayCluster.Namespace, rayCluster.Name)
 	g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutShort).
 		Should(WithTransform(StatusCondition(rayv1.RayClusterReplicaFailure), MatchConditionContainsMessage(metav1.ConditionTrue, utils.ErrFailedCreateHeadPod.Error(), "forbidden: exceeded quota")))
+
+	// Give operator time to gracefully clean up resources before namespace deletion
+	time.Sleep(2 * time.Second)
 }
 
 func TestRayClusterScalingDown(t *testing.T) {
@@ -264,3 +269,102 @@ func TestRayClusterUpgradeStrategy(t *testing.T) {
 	g.Expect(err).NotTo(HaveOccurred())
 	g.Expect(newWorkerPods).To(HaveLen(1))
 }
+
+// TestRayClusterWithFractionalGPU tests that RayCluster correctly converts fractional GPU resource specs
+// to Ray start parameters with --num-gpus flag.
+// This test demonstrates support for issue #4447 where fractional GPU serving (e.g., 0.4 GPU per model)
+// is needed for efficient resource utilization when serving multiple models on a single GPU.
+//
+// IMPORTANT: Kubernetes doesn't support fractional GPU values in pod resource specs (GPU must be integer).
+// Fractional GPU allocation is handled by Ray itself via the --num-gpus parameter.
+// The KubeRay operator's role is to convert autoscaler group resource specs to Ray start parameters.
+// Reference: https://github.com/ray-project/kuberay/issues/4447
+func TestRayClusterWithFractionalGPU(t *testing.T) {
+	test := With(t)
+	g := NewWithT(t)
+
+	// Create a namespace
+	namespace := test.NewTestNamespace()
+
+	// Define a RayCluster with fractional GPU in autoscaler group spec
+	// The operator will convert this to Ray start parameters
+	rayClusterAC := rayv1ac.RayCluster("ray-fractional-gpu", namespace.Name).
+		WithSpec(rayv1ac.RayClusterSpec().
+			WithRayVersion(GetRayVersion()).
+			WithHeadGroupSpec(rayv1ac.HeadGroupSpec().
+				WithRayStartParams(map[string]string{"num-cpus": "2"}).
+				WithTemplate(HeadPodTemplateApplyConfiguration())).
+			WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec().
+				WithGroupName("gpu-workers").
+				WithReplicas(1).
+				WithMinReplicas(0).
+				WithMaxReplicas(2).
+				// Specify fractional GPU in the group resource spec
+				// This is what gets converted to Ray's --num-gpus parameter
+				WithResources(map[string]string{
+					"CPU":           "1",
+					"memory":        "1Gi",
+					"nvidia.com/gpu": "0.4", // Fractional GPU for ray autoscaler
+				}).
+				WithRayStartParams(map[string]string{
+					"num-cpus": "1",
+				}).
+				WithTemplate(func() *corev1ac.PodTemplateSpecApplyConfiguration {
+					// Pod template with standard integer GPU request
+					// Kubernetes requires integer GPU values
+					return corev1ac.PodTemplateSpec().
+						WithSpec(corev1ac.PodSpec().
+							WithContainers(corev1ac.Container().
+								WithName("ray-worker").
+								WithImage(GetRayImage()).
+								WithResources(corev1ac.ResourceRequirements().
+									WithRequests(corev1.ResourceList{
+										corev1.ResourceCPU:    ptr.Deref(resource.NewQuantity(1, resource.DecimalSI), resource.Quantity{}),
+										corev1.ResourceMemory: ptr.Deref(resource.NewQuantity(1*1024*1024*1024, resource.BinarySI), resource.Quantity{}),
+									}))))
+				}())))
+
+	// Create the RayCluster
+	rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions)
+	g.Expect(err).NotTo(HaveOccurred(), "Failed to create RayCluster")
+	LogWithTimestamp(t, "Created RayCluster %s/%s with fractional GPU (0.4) in group resources", rayCluster.Namespace, rayCluster.Name)
+
+	// Wait for worker pods to be created
+	g.Eventually(func() int {
+		pods, err := test.Client().Core().CoreV1().Pods(namespace.Name).List(test.Ctx(), metav1.ListOptions{
+			LabelSelector: "ray.io/cluster=" + rayCluster.Name + ",ray.io/node-type=worker",
+		})
+		if err != nil {
+			return 0
+		}
+		return len(pods.Items)
+	}, TestTimeoutShort).Should(BeNumerically(">=", 1), "Worker pod should be created")
+
+	// Get the worker pods and verify Ray start parameters
+	workerPods, err := GetWorkerPods(test, rayCluster)
+	g.Expect(err).NotTo(HaveOccurred())
+	g.Expect(workerPods).To(HaveLen(1), "Expected 1 worker pod")
+
+	// Verify the Ray start command includes the fractional num-gpus parameter
+	workerPod := workerPods[0]
+	container := workerPod.Spec.Containers[0]
+
+	// The operator should have converted group resource "nvidia.com/gpu: 0.4" to Ray start param
+	envVars := container.Env
+	var rayStartCmd string
+	for _, env := range envVars {
+		if env.Name == "KUBERAY_GEN_RAY_START_CMD" {
+			rayStartCmd = env.Value
+			break
+		}
+	}
+	g.Expect(rayStartCmd).NotTo(BeEmpty(), "Ray start command should be generated")
+	g.Expect(rayStartCmd).To(ContainSubstring("--num-gpus=0.4"), "Ray start command should contain fractional GPU parameter '--num-gpus=0.4'")
+
+	LogWithTimestamp(t, "✓ Worker pod created successfully")
+	LogWithTimestamp(t, "✓ Ray start command contains: --num-gpus=0.4")
+	LogWithTimestamp(t, "✓ Test passed: Fractional GPU (0.4) correctly converted from group resources to Ray start parameter")
+
+	// Give operator time to gracefully clean up resources before namespace deletion
+	time.Sleep(2 * time.Second)
+}