Update AutoScaling Blog to 24.07

indrajit96 · indrajit96 · commit 1145f107c571 · 2024-08-27T11:19:12.000-07:00
diff --git a/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/containers/triton_trt-llm.containerfile b/Deployment/Kubernetes/TensorRT-LLM_Autoscaling_and_Load_Balancing/containers/triton_trt-llm.containerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.04-trtllm-python-py3
+ARG BASE_CONTAINER_IMAGE=nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
 ARG ENGINE_DEST_PATH=/var/run/engines
 ARG HF_HOME=/var/run/cache
 
@@ -42,7 +42,7 @@ RUN pip --verbose install \
     --no-cache-dir \
     --no-color \
     --no-input \
-    git+https://github.com/triton-inference-server/triton_cli.git@jwyman/aslb-mn
+    git+https://github.com/triton-inference-server/triton_cli.git@ibhosale/aslb-mn
 
 # Copy the server script.
 COPY server.py .