We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 5f85241 commit 1145f10Copy full SHA for 1145f10
Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/containers/triton_trt-llm.containerfile
@@ -12,7 +12,7 @@
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
15
-ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.04-trtllm-python-py3
+ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
16
ARG ENGINE_DEST_PATH=/var/run/engines
17
ARG HF_HOME=/var/run/cache
18
@@ -42,7 +42,7 @@ RUN pip --verbose install \
42
--no-cache-dir \
43
--no-color \
44
--no-input \
45
- git+https://github.com/triton-inference-server/triton_cli.git@jwyman/aslb-mn
+ git+https://github.com/triton-inference-server/triton_cli.git@ibhosale/aslb-mn
46
47
# Copy the server script.
48
COPY server.py .
0 commit comments