Skip to content

Commit f124a28

Browse files
committed
Add model confgi.pbtxt and example env settings
Signed-off-by: M Q <[email protected]>
1 parent 9cf75b4 commit f124a28

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
export HOLOSCAN_INPUT_PATH="inputs/spleen_ct_tcia"
3+
export HOLOSCAN_MODEL_PATH="examples/apps/ai_remote_infer_app/models_client_side"
4+
export HOLOSCAN_OUTPUT_PATH="output_spleen"
5+
export HOLOSCAN_LOG_LEVEL=DEBUG # TRACE can be used for verbose low-level logging
6+
export TRITON_SERVER_NETLOC="localhost:8000" # Triton server network location, host:port
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
platform: "pytorch_libtorch"
2+
3+
max_batch_size: 16 # The maximum batch size. 0 for no batching with full shape in dims
4+
5+
default_model_filename: "model_spleen_ct_segmentation_v1.ts" # The name of the TorchScript model file
6+
7+
input [
8+
{
9+
name: "INPUT_0" # The name of the input tensor (or should match the input tensor name in your model if used)
10+
data_type: TYPE_FP32 # Data type is FP32
11+
dims: [ 1, 96, 96, 96 ] # Input dimensions: [channels, width, height, depth], to be stacked as a batch
12+
}
13+
]
14+
15+
output [
16+
{
17+
name: "OUTPUT_0" # The name of the output tensor (match this with your TorchScript model's output name)
18+
data_type: TYPE_FP32 # Output is FP32
19+
dims: [ 2, 96, 96, 96 ] # Output dimensions: [channels, width, height, depth], stacked to match input batch size
20+
}
21+
]
22+
23+
version_policy: { latest: { num_versions: 1}} # Only serve the latest version, which is the default
24+
25+
instance_group [
26+
{
27+
kind: KIND_GPU # Specify the hardware type (GPU in this case)
28+
count: 1 # Number of instances created for each GPU listed in 'gpus' (adjust based on your resources)
29+
}
30+
]
31+
32+
dynamic_batching {
33+
preferred_batch_size: [ 4, 8, 16 ] # Preferred batch size(s) for dynamic batching. Matching the max_batch_size for sync calls.
34+
max_queue_delay_microseconds: 1000 # Max delay before processing the batch.
35+
}
36+
37+
# The initial calls to a loaded TorchScript model take extremely long.
38+
# Due to this longer model warmup issue, Triton allows execution of models without these optimizations.
39+
parameters: {
40+
key: "DISABLE_OPTIMIZED_EXECUTION"
41+
value: {
42+
string_value: "true"
43+
}
44+
}

0 commit comments

Comments
 (0)