triton-inference-server · whoisj · Jul 3, 2024 · Jun 12, 2024 · Jun 11, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -65,7 +65,7 @@ repos:
   - id: check-json
   - id: check-toml
   - id: check-yaml
-    exclude: ^Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/chart/templates/.+$
+    exclude: ^Deployment/Kubernetes/[^/]+/chart/templates/.+$
   - id: check-shebang-scripts-are-executable
   - id: end-of-file-fixer
     types_or: [c, c++, cuda, proto, textproto, java, python]

diff --git a/Deployment/Kubernetes/README.md b/Deployment/Kubernetes/README.md
@@ -1,3 +1,4 @@
 # Kubernetes Deployment of Triton Server Guides
 
 * [TensorRT-LLM Gen. AI Autoscaling &amp; Load Balancing](./TensorRT-LLM_Autoscaling_and_Load_Balancing/README.md)
+* [Multi-Node Generative AI w/ Triton Server and TensorRT-LLM](./TensorRT-LLM_Multi-Node_Distributed_Models/README.md)
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore
@@ -0,0 +1,5 @@
+.vscode/
+**/.vscode/
+
+dev_*
+**/dev_*
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore
@@ -0,0 +1 @@
+dev_values.yaml
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+appVersion: 0.1.0
+description: Generative AI Multi-Node w/ Triton and TensorRT-LLM Guide/Tutorial
+icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/[email protected]
+name: triton_trt-llm_multi-node_example
+version: 0.1.0
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: gpt2
diff --git a/...yment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml b/...yment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: NVIDIA-A10G
+
+model:
+  name: llama-2-70b
+  tensorrtLlm:
+    conversion:
+      gpu: 8
+      memory: 256Gi
+    parallelism:
+      tensor: 8
diff --git a/...t/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml b/...t/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-2-7b-chat
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
diff --git a/...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml b/...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-2-7b
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
diff --git a/...ernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml b/...ernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: NVIDIA-A10G
+
+model:
+  name: llama-3-70b-instruct
+  tensorrtLlm:
+    conversion:
+      gpu: 8
+      memory: 256Gi
+    parallelism:
+      tensor: 8
diff --git a/...bernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml b/...bernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-3-8b-instruct
+  tensorrtLlm:
+    conversion:
+      gpu: 4
+      memory: 128Gi
+    parallelism:
+      tensor: 4
diff --git a/...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml b/...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-3-8b
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: opt125m
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt b/Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt
@@ -0,0 +1,48 @@
+{{- $create_account := true }}
+{{- $create_job := true }}
+{{- $create_service := true }}
+{{- with $.Values.model }}
+{{-   if .skipConversion }}
+{{-     $create_job = false }}
+{{-   end }}
+{{- end }}
+{{- with $.Values.kubernetes }}
+{{-   if .noService }}
+{{-     $create_service = false }}
+{{-   end }}
+{{-   if .serviceAccount}}
+{{-     $create_account = false }}
+{{-   end }}
+{{- end }}
+
+{{ $.Chart.Name }} ({{ $.Chart.Version }}) installation complete.
+
+Release Name: {{ $.Release.Name }}
+Namespace: {{ $.Release.Namespace }}
+Deployment Name: {{ $.Release.Name }}
+{{- if $create_job }}
+Conversion Job: {{ $.Release.Name }}
+{{- end }}
+{{- if $create_service }}
+Service Name: {{ $.Release.Name }}
+{{- end }}
+{{- if $create_account }}
+ServiceAccount Name: {{ $.Release.Name }}
+{{- end }}
+
+Helpful commands:
+
+  $ helm status --namespace={{ $.Release.Namespace }} {{ $.Release.Name }}
+  $ helm get --namespace={{ $.Release.Namespace }} all {{ $.Release.Name }}
+  $ kubectl get --namespace={{ $.Release.Namespace }} --selector='app={{ $.Release.Name }}' deployments
+{{- if $create_job -}}
+,jobs
+{{- end -}}
+,pods
+{{- if $create_service -}}
+,services
+{{- end -}}
+,podmonitors
+{{- if $create_account -}}
+,serviceAccounts
+{{- end -}}