Skip to content

Commit 6c7383b

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Adding Qwen2.5-Instruct-32B-AWQ TPU configs to Colab deployment notebook
PiperOrigin-RevId: 703541226
1 parent 4bec5ef commit 6c7383b

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,13 +510,13 @@
510510
"# @markdown This section uploads prebuilt Qwen2 & Qwen2.5 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n",
511511
"\n",
512512
"# @markdown Select one of the four model variations.\n",
513-
"MODEL_ID = \"Qwen2.5-0.5B-Instruct\" # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\"] {isTemplate: true}\n",
513+
"MODEL_ID = \"Qwen2.5-0.5B-Instruct\" # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\", \"Qwen2.5-32B-Instruct-AWQ\"] {isTemplate: true}\n",
514514
"TPU_DEPLOYMENT_REGION = \"us-west1\" # @param [\"us-west1\"] {isTemplate:true}\n",
515515
"model_path_prefix = \"Qwen\"\n",
516516
"model_id = os.path.join(model_path_prefix, MODEL_ID)\n",
517517
"\n",
518518
"# The pre-built serving docker images.\n",
519-
"HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241118_1550_RC00\"\n",
519+
"HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241121_1331_RC00\"\n",
520520
"\n",
521521
"# @markdown Find Vertex AI prediction TPUv5e machine types in\n",
522522
"# @markdown https://cloud.google.com/vertex-ai/docs/predictions/use-tpu#deploy_a_model.\n",
@@ -529,7 +529,7 @@
529529
" tpu_topo = \"1x4\"\n",
530530
" max_model_len = 8192\n",
531531
" machine_type = \"ct5lp-hightpu-1t\"\n",
532-
"elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID:\n",
532+
"elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID or \"32B\" in MODEL_ID:\n",
533533
" tpu_count = 4\n",
534534
" tpu_topo = \"4x4\"\n",
535535
" max_model_len = 131072\n",

notebooks/community/model_garden/model_garden_tfvision_image_classification.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@
115115
"# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n",
116116
"# @markdown | ----------- | ----------- | ----------- |\n",
117117
"# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n",
118-
"# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, us-west1, europe-west4, asia-southeast1 |\n",
118+
"# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
119+
"# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
120+
"# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, us-east5, europe-west4, us-west1, asia-southeast1 |\n",
119121
"\n",
120122
"! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git\n",
121123
"\n",

0 commit comments

Comments
 (0)