Adding Qwen2.5-Instruct-32B-AWQ TPU configs to Colab deployment notebook

vertex-mg-bot · copybara-github · commit 6c7383beaf83 · 2024-12-06T10:26:35.000-08:00
PiperOrigin-RevId: 703541226
diff --git a/notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb
@@ -510,13 +510,13 @@
         "# @markdown This section uploads prebuilt Qwen2 & Qwen2.5 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n",
         "\n",
         "# @markdown Select one of the four model variations.\n",
-        "MODEL_ID = \"Qwen2.5-0.5B-Instruct\"  # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\"] {isTemplate: true}\n",
+        "MODEL_ID = \"Qwen2.5-0.5B-Instruct\"  # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\", \"Qwen2.5-32B-Instruct-AWQ\"] {isTemplate: true}\n",
         "TPU_DEPLOYMENT_REGION = \"us-west1\"  # @param [\"us-west1\"] {isTemplate:true}\n",
         "model_path_prefix = \"Qwen\"\n",
         "model_id = os.path.join(model_path_prefix, MODEL_ID)\n",
         "\n",
         "# The pre-built serving docker images.\n",
-        "HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241118_1550_RC00\"\n",
+        "HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241121_1331_RC00\"\n",
         "\n",
         "# @markdown Find Vertex AI prediction TPUv5e machine types in\n",
         "# @markdown https://cloud.google.com/vertex-ai/docs/predictions/use-tpu#deploy_a_model.\n",
@@ -529,7 +529,7 @@
         "    tpu_topo = \"1x4\"\n",
         "    max_model_len = 8192\n",
         "    machine_type = \"ct5lp-hightpu-1t\"\n",
-        "elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID:\n",
+        "elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID or \"32B\" in MODEL_ID:\n",
         "    tpu_count = 4\n",
         "    tpu_topo = \"4x4\"\n",
         "    max_model_len = 131072\n",
diff --git a/notebooks/community/model_garden/model_garden_tfvision_image_classification.ipynb b/notebooks/community/model_garden/model_garden_tfvision_image_classification.ipynb
@@ -115,7 +115,9 @@
         "# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n",
         "# @markdown | ----------- | ----------- | ----------- |\n",
         "# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n",
-        "# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, us-west1, europe-west4, asia-southeast1 |\n",
+        "# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, us-east5, europe-west4, us-west1, asia-southeast1 |\n",
         "\n",
         "! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n",
         "\n",