|
147 | 147 | ")\n", |
148 | 148 | "\n", |
149 | 149 | "models, endpoints = {}, {}\n", |
150 | | - "# Dedicated endpoint not supported yet\n", |
151 | | - "use_dedicated_endpoint = False\n", |
152 | 150 | "\n", |
153 | 151 | "# Get the default cloud project id.\n", |
154 | 152 | "PROJECT_ID = os.environ[\"GOOGLE_CLOUD_PROJECT\"]\n", |
|
390 | 388 | " return model, endpoint\n", |
391 | 389 | "\n", |
392 | 390 | "\n", |
| 391 | + "# @markdown Set use_dedicated_endpoint to False if you don't want to use [dedicated endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#create-dedicated-endpoint).\n", |
| 392 | + "use_dedicated_endpoint = True # @param {type:\"boolean\"}\n", |
| 393 | + "\n", |
| 394 | + "\n", |
393 | 395 | "models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n", |
394 | 396 | " model_name=common_util.get_job_name_with_datetime(prefix=\"llama3-guard\"),\n", |
395 | 397 | " model_id=model_id,\n", |
|
520 | 522 | " \"@requestFormat\": \"chatCompletions\",\n", |
521 | 523 | " },\n", |
522 | 524 | "]\n", |
523 | | - "response = endpoints[\"vllm_gpu\"].predict(instances=instances)\n", |
| 525 | + "response = endpoints[\"vllm_gpu\"].predict(\n", |
| 526 | + " instances=instances, use_dedicated_endpoint=use_dedicated_endpoint\n", |
| 527 | + ")\n", |
524 | 528 | "\n", |
525 | | - "prediction = response.predictions[0]\n", |
| 529 | + "prediction = response.predictions\n", |
526 | 530 | "print(prediction)\n", |
527 | 531 | "print(\"Llama Guard prediction:\", prediction[\"choices\"][0][\"message\"][\"content\"])" |
528 | 532 | ] |
|
653 | 657 | " \"@requestFormat\": \"chatCompletions\",\n", |
654 | 658 | " },\n", |
655 | 659 | "]\n", |
656 | | - "response = endpoints[\"vllm_gpu\"].predict(instances=instances)\n", |
| 660 | + "response = endpoints[\"vllm_gpu\"].predict(\n", |
| 661 | + " instances=instances, use_dedicated_endpoint=use_dedicated_endpoint\n", |
| 662 | + ")\n", |
657 | 663 | "\n", |
658 | | - "prediction = response.predictions[0]\n", |
| 664 | + "prediction = response.predictions\n", |
659 | 665 | "print(prediction)\n", |
660 | 666 | "print(\"Llama Guard prediction:\", prediction[\"choices\"][0][\"message\"][\"content\"])" |
661 | 667 | ] |
|
0 commit comments