Add device id to cuda requests

thodkatz · thodkatz · commit 46ac782fade6 · 2024-08-19T14:51:21.000+02:00
The current interface supports multiple device ids. To check if a cuda memory request is a valid one, meaning that a gpu is detected, a device id is needed to do the check for the available ones if any.
diff --git a/proto/inference.proto b/proto/inference.proto
@@ -102,8 +102,9 @@ message Tensor {
 
 message IsCudaOutOfMemoryRequest {
   string modelSessionId = 1;
-  string tensorId = 3;
-  NamedInts shape = 2;
+  string tensorId = 2;
+  NamedInts shape = 3;
+  string deviceId = 4;
 }
 
 message IsCudaOutOfMemoryResponse {
@@ -116,6 +117,7 @@ message MaxCudaMemoryShapeRequest {
   NamedInts stepShape = 3;
   NamedInts minShape = 4;
   NamedInts maxShape = 5;
+  string deviceId = 6;
 }
 
 message MaxCudaMemoryShapeResponse {
diff --git a/tests/test_server/test_grpc/test_inference_servicer.py b/tests/test_server/test_grpc/test_inference_servicer.py
@@ -36,9 +36,9 @@ def grpc_stub_cls(grpc_channel):
     return inference_pb2_grpc.InferenceStub
 
 
-@pytest.fixture
-def inference_servicer_gpu():
-    with patch.object(InferenceServicer, "_is_gpu", lambda x: True):
+@pytest.fixture()
+def gpu_exists():
+    with patch.object(InferenceServicer, "_check_gpu_exists", lambda *args: None):
         yield
 
 
@@ -260,7 +260,7 @@ def to_pb_namedInts(self, shape: Tuple[int, ...]) -> inference_pb2.NamedInts:
     )
     def test_max_cuda_memory(
         self,
-        inference_servicer_gpu,
+        gpu_exists,
         min_shape,
         max_shape,
         step_shape,
@@ -275,23 +275,67 @@ def test_max_cuda_memory(
         model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
         res = grpc_stub.MaxCudaMemoryShape(
             inference_pb2.MaxCudaMemoryShapeRequest(
-                modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step_shape
+                modelSessionId=model.id,
+                tensorId="input",
+                deviceId="cuda:0",
+                minShape=min_shape,
+                maxShape=max_shape,
+                stepShape=step_shape,
             )
         )
         grpc_stub.CloseModelSession(model)
         assert res.maxShape == self.to_pb_namedInts(expected)
 
-    def test_max_cuda_memory_not_found(
-        self, inference_servicer_gpu, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
+    @pytest.mark.parametrize(
+        "min_shape, max_shape, step_shape, description",
+        [
+            ((1, 1, 6, 6), (1, 1, 5, 5), (0, 0, 1, 1), "Max shape [1 1 5 5] smaller than min shape [1 1 6 6]"),
+            ((1, 1, 5, 5), (1, 1, 6, 6), (0, 0, 2, 1), "Invalid parameterized shape"),
+        ],
+    )
+    def test_max_cuda_memory_invalid_request(
+        self,
+        description,
+        gpu_exists,
+        min_shape,
+        max_shape,
+        step_shape,
+        grpc_stub,
+        bioimageio_dummy_cuda_out_of_memory_model_bytes,
     ):
+        min_shape = self.to_pb_namedInts(min_shape)
+        max_shape = self.to_pb_namedInts(max_shape)
+        step_shape = self.to_pb_namedInts(step_shape)
+
+        model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
+        with pytest.raises(grpc.RpcError) as error:
+            grpc_stub.MaxCudaMemoryShape(
+                inference_pb2.MaxCudaMemoryShapeRequest(
+                    modelSessionId=model.id,
+                    tensorId="input",
+                    deviceId="cuda:0",
+                    minShape=min_shape,
+                    maxShape=max_shape,
+                    stepShape=step_shape,
+                )
+            )
+        assert error.value.details().startswith(f"Exception calling application: {description}")
+        grpc_stub.CloseModelSession(model)
+
+    def test_max_cuda_memory_not_found(self, gpu_exists, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes):
         model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
         min_shape = self.to_pb_namedInts((1, 1, 11, 11))
         max_shape = self.to_pb_namedInts((1, 1, 12, 12))
         step = self.to_pb_namedInts((0, 0, 1, 1))
         with pytest.raises(grpc.RpcError) as error:
             grpc_stub.MaxCudaMemoryShape(
                 inference_pb2.MaxCudaMemoryShapeRequest(
-                    modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step
+                    modelSessionId=model.id,
+                    tensorId="input",
+                    deviceId="cuda:0",
+                    minShape=min_shape,
+                    maxShape=max_shape,
+                    stepShape=step,
                 )
             )
         assert error.value.code() == grpc.StatusCode.NOT_FOUND
@@ -303,12 +347,14 @@ def test_max_cuda_memory_not_found(
         [((1, 1, 10, 10), False), ((1, 1, 99, 99), True)],
     )
     def test_is_out_of_memory(
-        self, inference_servicer_gpu, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
+        self, gpu_exists, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
     ):
         model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
         shape = self.to_pb_namedInts(shape)
         res = grpc_stub.IsCudaOutOfMemory(
-            inference_pb2.IsCudaOutOfMemoryRequest(modelSessionId=model.id, tensorId="input", shape=shape)
+            inference_pb2.IsCudaOutOfMemoryRequest(
+                modelSessionId=model.id, tensorId="input", deviceId="cuda:0", shape=shape
+            )
         )
         grpc_stub.CloseModelSession(model)
         assert res.isCudaOutOfMemory is expected
diff --git a/tiktorch/proto/inference_pb2.py b/tiktorch/proto/inference_pb2.py
diff --git a/tiktorch/rpc/mp.py b/tiktorch/rpc/mp.py
@@ -112,9 +112,11 @@ class _Api:
 
 @dataclasses.dataclass(frozen=True)
 class BioModelClient:
+    name: str
     api: IRPCModelSession
     input_specs: List[nodes.InputTensor]
     output_specs: List[nodes.OutputTensor]
+    devices: List[str]
 
 
 class MPClient:
diff --git a/tiktorch/server/grpc/inference_servicer.py b/tiktorch/server/grpc/inference_servicer.py
@@ -98,6 +98,7 @@ def MaxCudaMemoryShape(
         self, request: inference_pb2.MaxCudaMemoryShapeRequest, context
     ) -> inference_pb2.MaxCudaMemoryShapeResponse:
         session = self._getModelSession(context, request.modelSessionId)
+        self._check_gpu_exists(session.bio_model_client, request.deviceId)
         min_shape = pb_NamedInts_to_named_shape(request.minShape)
         step_shape = pb_NamedInts_to_named_shape(request.stepShape)
         max_shape = pb_NamedInts_to_named_shape(request.maxShape)
@@ -115,6 +116,7 @@ def IsCudaOutOfMemory(
         self, request: inference_pb2.IsCudaOutOfMemoryRequest, context
     ) -> inference_pb2.IsCudaOutOfMemoryResponse:
         session = self._getModelSession(context, request.modelSessionId)
+        self._check_gpu_exists(session.bio_model_client, request.deviceId)
         return inference_pb2.IsCudaOutOfMemoryResponse(
             isCudaOutOfMemory=self._is_cuda_out_of_memory(
                 session.bio_model_client, request.tensorId, pb_NamedInts_to_named_shape(request.shape)
@@ -145,8 +147,6 @@ def _get_max_shape(
         return None
 
     def _is_cuda_out_of_memory(self, client: BioModelClient, tensor_id: str, shape: NamedShape) -> bool:
-        if not self._is_gpu():
-            return False
         is_out_of_memory = False
         dummy_tensor = xarray.DataArray(np.random.rand(*shape.values()), dims=shape.keys())
         sample = Sample.from_xr_tensors(tensor_ids=[tensor_id], tensors_data=[dummy_tensor])
@@ -168,8 +168,12 @@ def _validated_forward(self, client: BioModelClient, sample: Sample):
         validator.check_tensors(sample)
         return client.api.forward(sample)
 
-    def _is_gpu(self) -> bool:
-        return torch.cuda.is_available()
+    def _check_gpu_exists(self, client: BioModelClient, device_id: str):
+        gpu_device_ids = [device.id for device in self.__device_pool.list_devices() if device.id.startswith("cuda")]
+        if len(gpu_device_ids) == 0:
+            raise ValueError("Not available gpus found")
+        if device_id not in client.devices:
+            raise ValueError(f"{device_id} not found for model {client.name}")
 
     def _getModelSession(self, context, modelSessionId: str) -> Session:
         if not modelSessionId:
diff --git a/tiktorch/server/session/process.py b/tiktorch/server/session/process.py
@@ -76,7 +76,11 @@ def start_model_session_process(
     proc.start()
     api = _mp_rpc.create_client_api(iface_cls=IRPCModelSession, conn=client_conn)
     return proc, BioModelClient(
-        input_specs=prediction_pipeline.input_specs, output_specs=prediction_pipeline.output_specs, api=api
+        name=prediction_pipeline.name,
+        devices=devices,
+        input_specs=prediction_pipeline.input_specs,
+        output_specs=prediction_pipeline.output_specs,
+        api=api,
     )