|
510 | 510 | "# @markdown This section uploads prebuilt Qwen2 & Qwen2.5 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n", |
511 | 511 | "\n", |
512 | 512 | "# @markdown Select one of the four model variations.\n", |
513 | | - "MODEL_ID = \"Qwen2.5-0.5B-Instruct\" # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\"] {isTemplate: true}\n", |
| 513 | + "MODEL_ID = \"Qwen2.5-0.5B-Instruct\" # @param [\"Qwen2-0.5B-Instruct\", \"Qwen2-1.5B-Instruct\", \"Qwen2-7B-Instruct\", \"Qwen2.5-0.5B-Instruct\", \"Qwen2.5-1.5B-Instruct\", \"Qwen2.5-7B-Instruct\", \"Qwen2.5-14B-Instruct\", \"Qwen2.5-32B-Instruct-AWQ\"] {isTemplate: true}\n", |
514 | 514 | "TPU_DEPLOYMENT_REGION = \"us-west1\" # @param [\"us-west1\"] {isTemplate:true}\n", |
515 | 515 | "model_path_prefix = \"Qwen\"\n", |
516 | 516 | "model_id = os.path.join(model_path_prefix, MODEL_ID)\n", |
517 | 517 | "\n", |
518 | 518 | "# The pre-built serving docker images.\n", |
519 | | - "HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241118_1550_RC00\"\n", |
| 519 | + "HEXLLM_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai-restricted/vertex-vision-model-garden-dockers/hex-llm-serve:20241121_1331_RC00\"\n", |
520 | 520 | "\n", |
521 | 521 | "# @markdown Find Vertex AI prediction TPUv5e machine types in\n", |
522 | 522 | "# @markdown https://cloud.google.com/vertex-ai/docs/predictions/use-tpu#deploy_a_model.\n", |
|
529 | 529 | " tpu_topo = \"1x4\"\n", |
530 | 530 | " max_model_len = 8192\n", |
531 | 531 | " machine_type = \"ct5lp-hightpu-1t\"\n", |
532 | | - "elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID:\n", |
| 532 | + "elif \"7B\" in MODEL_ID or \"14B\" in MODEL_ID or \"32B\" in MODEL_ID:\n", |
533 | 533 | " tpu_count = 4\n", |
534 | 534 | " tpu_topo = \"4x4\"\n", |
535 | 535 | " max_model_len = 131072\n", |
|
0 commit comments