triton-inference-server
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore
Lines changed: 5 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md
Lines changed: 678 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md
Lines changed: 678 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore
Lines changed: 1 addition & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml
Lines changed: 20 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml
Lines changed: 20 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml
Lines changed: 18 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml
Lines changed: 18 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml
Lines changed: 26 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml
Lines changed: 26 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml
Lines changed: 20 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml
Lines changed: 20 additions & 0 deletions
diff --git a/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt
Lines changed: 48 additions & 0 deletions b/‎Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt
Lines changed: 48 additions & 0 deletions
@@ -0,0 +1,5 @@
+.vscode/
+**/.vscode/
+
+dev_*
+**/dev_*
@@ -0,0 +1 @@
+dev_values.yaml
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+appVersion: 0.1.0
+description: Generative AI Multi-Node w/ Triton and TensorRT-LLM Guide/Tutorial
+icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/[email protected]
+name: triton_trt-llm_multi-node_example
+version: 0.1.0
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: gpt2
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: NVIDIA-A10G
+
+model:
+  name: llama-2-70b
+  tensorrtLlm:
+    conversion:
+      gpu: 8
+      memory: 256Gi
+    parallelism:
+      tensor: 8
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-2-7b-chat
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-2-7b
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: NVIDIA-A10G
+
+model:
+  name: llama-3-70b-instruct
+  tensorrtLlm:
+    conversion:
+      gpu: 8
+      memory: 256Gi
+    parallelism:
+      tensor: 8
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-3-8b-instruct
+  tensorrtLlm:
+    conversion:
+      gpu: 4
+      memory: 128Gi
+    parallelism:
+      tensor: 4
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: llama-3-8b
+  tensorrtLlm:
+    conversion:
+      gpu: 2
+      memory: 64Gi
+    parallelism:
+      tensor: 2
@@ -0,0 +1,20 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# See values.yaml for reference values.
+
+gpu: Tesla-V100-SXM2-16GB
+
+model:
+  name: opt125m
@@ -0,0 +1,48 @@
+{{- $create_account := true }}
+{{- $create_job := true }}
+{{- $create_service := true }}
+{{- with $.Values.model }}
+{{-   if .skipConversion }}
+{{-     $create_job = false }}
+{{-   end }}
+{{- end }}
+{{- with $.Values.kubernetes }}
+{{-   if .noService }}
+{{-     $create_service = false }}
+{{-   end }}
+{{-   if .serviceAccount}}
+{{-     $create_account = false }}
+{{-   end }}
+{{- end }}
+
+{{ $.Chart.Name }} ({{ $.Chart.Version }}) installation complete.
+
+Release Name: {{ $.Release.Name }}
+Namespace: {{ $.Release.Namespace }}
+Deployment Name: {{ $.Release.Name }}
+{{- if $create_job }}
+Conversion Job: {{ $.Release.Name }}
+{{- end }}
+{{- if $create_service }}
+Service Name: {{ $.Release.Name }}
+{{- end }}
+{{- if $create_account }}
+ServiceAccount Name: {{ $.Release.Name }}
+{{- end }}
+
+Helpful commands:
+
+  $ helm status --namespace={{ $.Release.Namespace }} {{ $.Release.Name }}
+  $ helm get --namespace={{ $.Release.Namespace }} all {{ $.Release.Name }}
+  $ kubectl get --namespace={{ $.Release.Namespace }} --selector='app={{ $.Release.Name }}' deployments
+{{- if $create_job -}}
+,jobs
+{{- end -}}
+,pods
+{{- if $create_service -}}
+,services
+{{- end -}}
+,podmonitors
+{{- if $create_account -}}
+,serviceAccounts
+{{- end -}}