Skip to content

Commit 46ac782

Browse files
committed
Add device id to cuda requests
The current interface supports multiple device ids. To check if a cuda memory request is a valid one, meaning that a gpu is detected, a device id is needed to do the check for the available ones if any.
1 parent b1595d7 commit 46ac782

File tree

6 files changed

+109
-37
lines changed

6 files changed

+109
-37
lines changed

proto/inference.proto

+4-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,9 @@ message Tensor {
102102

103103
message IsCudaOutOfMemoryRequest {
104104
string modelSessionId = 1;
105-
string tensorId = 3;
106-
NamedInts shape = 2;
105+
string tensorId = 2;
106+
NamedInts shape = 3;
107+
string deviceId = 4;
107108
}
108109

109110
message IsCudaOutOfMemoryResponse {
@@ -116,6 +117,7 @@ message MaxCudaMemoryShapeRequest {
116117
NamedInts stepShape = 3;
117118
NamedInts minShape = 4;
118119
NamedInts maxShape = 5;
120+
string deviceId = 6;
119121
}
120122

121123
message MaxCudaMemoryShapeResponse {

tests/test_server/test_grpc/test_inference_servicer.py

+56-10
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ def grpc_stub_cls(grpc_channel):
3636
return inference_pb2_grpc.InferenceStub
3737

3838

39-
@pytest.fixture
40-
def inference_servicer_gpu():
41-
with patch.object(InferenceServicer, "_is_gpu", lambda x: True):
39+
@pytest.fixture()
40+
def gpu_exists():
41+
with patch.object(InferenceServicer, "_check_gpu_exists", lambda *args: None):
4242
yield
4343

4444

@@ -260,7 +260,7 @@ def to_pb_namedInts(self, shape: Tuple[int, ...]) -> inference_pb2.NamedInts:
260260
)
261261
def test_max_cuda_memory(
262262
self,
263-
inference_servicer_gpu,
263+
gpu_exists,
264264
min_shape,
265265
max_shape,
266266
step_shape,
@@ -275,23 +275,67 @@ def test_max_cuda_memory(
275275
model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
276276
res = grpc_stub.MaxCudaMemoryShape(
277277
inference_pb2.MaxCudaMemoryShapeRequest(
278-
modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step_shape
278+
modelSessionId=model.id,
279+
tensorId="input",
280+
deviceId="cuda:0",
281+
minShape=min_shape,
282+
maxShape=max_shape,
283+
stepShape=step_shape,
279284
)
280285
)
281286
grpc_stub.CloseModelSession(model)
282287
assert res.maxShape == self.to_pb_namedInts(expected)
283288

284-
def test_max_cuda_memory_not_found(
285-
self, inference_servicer_gpu, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
289+
@pytest.mark.parametrize(
290+
"min_shape, max_shape, step_shape, description",
291+
[
292+
((1, 1, 6, 6), (1, 1, 5, 5), (0, 0, 1, 1), "Max shape [1 1 5 5] smaller than min shape [1 1 6 6]"),
293+
((1, 1, 5, 5), (1, 1, 6, 6), (0, 0, 2, 1), "Invalid parameterized shape"),
294+
],
295+
)
296+
def test_max_cuda_memory_invalid_request(
297+
self,
298+
description,
299+
gpu_exists,
300+
min_shape,
301+
max_shape,
302+
step_shape,
303+
grpc_stub,
304+
bioimageio_dummy_cuda_out_of_memory_model_bytes,
286305
):
306+
min_shape = self.to_pb_namedInts(min_shape)
307+
max_shape = self.to_pb_namedInts(max_shape)
308+
step_shape = self.to_pb_namedInts(step_shape)
309+
310+
model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
311+
with pytest.raises(grpc.RpcError) as error:
312+
grpc_stub.MaxCudaMemoryShape(
313+
inference_pb2.MaxCudaMemoryShapeRequest(
314+
modelSessionId=model.id,
315+
tensorId="input",
316+
deviceId="cuda:0",
317+
minShape=min_shape,
318+
maxShape=max_shape,
319+
stepShape=step_shape,
320+
)
321+
)
322+
assert error.value.details().startswith(f"Exception calling application: {description}")
323+
grpc_stub.CloseModelSession(model)
324+
325+
def test_max_cuda_memory_not_found(self, gpu_exists, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes):
287326
model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
288327
min_shape = self.to_pb_namedInts((1, 1, 11, 11))
289328
max_shape = self.to_pb_namedInts((1, 1, 12, 12))
290329
step = self.to_pb_namedInts((0, 0, 1, 1))
291330
with pytest.raises(grpc.RpcError) as error:
292331
grpc_stub.MaxCudaMemoryShape(
293332
inference_pb2.MaxCudaMemoryShapeRequest(
294-
modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step
333+
modelSessionId=model.id,
334+
tensorId="input",
335+
deviceId="cuda:0",
336+
minShape=min_shape,
337+
maxShape=max_shape,
338+
stepShape=step,
295339
)
296340
)
297341
assert error.value.code() == grpc.StatusCode.NOT_FOUND
@@ -303,12 +347,14 @@ def test_max_cuda_memory_not_found(
303347
[((1, 1, 10, 10), False), ((1, 1, 99, 99), True)],
304348
)
305349
def test_is_out_of_memory(
306-
self, inference_servicer_gpu, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
350+
self, gpu_exists, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes
307351
):
308352
model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes))
309353
shape = self.to_pb_namedInts(shape)
310354
res = grpc_stub.IsCudaOutOfMemory(
311-
inference_pb2.IsCudaOutOfMemoryRequest(modelSessionId=model.id, tensorId="input", shape=shape)
355+
inference_pb2.IsCudaOutOfMemoryRequest(
356+
modelSessionId=model.id, tensorId="input", deviceId="cuda:0", shape=shape
357+
)
312358
)
313359
grpc_stub.CloseModelSession(model)
314360
assert res.isCudaOutOfMemory is expected

tiktorch/proto/inference_pb2.py

+34-20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tiktorch/rpc/mp.py

+2
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,11 @@ class _Api:
112112

113113
@dataclasses.dataclass(frozen=True)
114114
class BioModelClient:
115+
name: str
115116
api: IRPCModelSession
116117
input_specs: List[nodes.InputTensor]
117118
output_specs: List[nodes.OutputTensor]
119+
devices: List[str]
118120

119121

120122
class MPClient:

tiktorch/server/grpc/inference_servicer.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def MaxCudaMemoryShape(
9898
self, request: inference_pb2.MaxCudaMemoryShapeRequest, context
9999
) -> inference_pb2.MaxCudaMemoryShapeResponse:
100100
session = self._getModelSession(context, request.modelSessionId)
101+
self._check_gpu_exists(session.bio_model_client, request.deviceId)
101102
min_shape = pb_NamedInts_to_named_shape(request.minShape)
102103
step_shape = pb_NamedInts_to_named_shape(request.stepShape)
103104
max_shape = pb_NamedInts_to_named_shape(request.maxShape)
@@ -115,6 +116,7 @@ def IsCudaOutOfMemory(
115116
self, request: inference_pb2.IsCudaOutOfMemoryRequest, context
116117
) -> inference_pb2.IsCudaOutOfMemoryResponse:
117118
session = self._getModelSession(context, request.modelSessionId)
119+
self._check_gpu_exists(session.bio_model_client, request.deviceId)
118120
return inference_pb2.IsCudaOutOfMemoryResponse(
119121
isCudaOutOfMemory=self._is_cuda_out_of_memory(
120122
session.bio_model_client, request.tensorId, pb_NamedInts_to_named_shape(request.shape)
@@ -145,8 +147,6 @@ def _get_max_shape(
145147
return None
146148

147149
def _is_cuda_out_of_memory(self, client: BioModelClient, tensor_id: str, shape: NamedShape) -> bool:
148-
if not self._is_gpu():
149-
return False
150150
is_out_of_memory = False
151151
dummy_tensor = xarray.DataArray(np.random.rand(*shape.values()), dims=shape.keys())
152152
sample = Sample.from_xr_tensors(tensor_ids=[tensor_id], tensors_data=[dummy_tensor])
@@ -168,8 +168,12 @@ def _validated_forward(self, client: BioModelClient, sample: Sample):
168168
validator.check_tensors(sample)
169169
return client.api.forward(sample)
170170

171-
def _is_gpu(self) -> bool:
172-
return torch.cuda.is_available()
171+
def _check_gpu_exists(self, client: BioModelClient, device_id: str):
172+
gpu_device_ids = [device.id for device in self.__device_pool.list_devices() if device.id.startswith("cuda")]
173+
if len(gpu_device_ids) == 0:
174+
raise ValueError("Not available gpus found")
175+
if device_id not in client.devices:
176+
raise ValueError(f"{device_id} not found for model {client.name}")
173177

174178
def _getModelSession(self, context, modelSessionId: str) -> Session:
175179
if not modelSessionId:

tiktorch/server/session/process.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,11 @@ def start_model_session_process(
7676
proc.start()
7777
api = _mp_rpc.create_client_api(iface_cls=IRPCModelSession, conn=client_conn)
7878
return proc, BioModelClient(
79-
input_specs=prediction_pipeline.input_specs, output_specs=prediction_pipeline.output_specs, api=api
79+
name=prediction_pipeline.name,
80+
devices=devices,
81+
input_specs=prediction_pipeline.input_specs,
82+
output_specs=prediction_pipeline.output_specs,
83+
api=api,
8084
)
8185

8286

0 commit comments

Comments
 (0)