Skip to content

Commit a9b2158

Browse files
anubbhav-malhotravertex-mg-botgericdongAiden010200pemujo
authored
merge 12/11 (#8)
* Enable dedicate endpoint for Llama Guard deployment PiperOrigin-RevId: 700022732 * Add Llama 3.2 serving notebook. PiperOrigin-RevId: 700025379 * fix: Update the model file path (GoogleCloudPlatform#3731) * fix: Update the model file path * Update the model file path 2 * fix: Remove example ouptut (GoogleCloudPlatform#3732) * Add 2H100/4H100 deploy options to llama notebooks PiperOrigin-RevId: 700107023 * Enable dedicate endpoint for huggingface tei deployment PiperOrigin-RevId: 700166368 * Enable dedicate endpoint for timesfm deployment PiperOrigin-RevId: 700190957 * Upload pipeline job example (GoogleCloudPlatform#3719) * Upload pipeline example which can combine other pipeline examples. * Avoid copying model artifacts to local GCS and use VERTEX_AI_MODEL_GARDEN_LLAMA_3_1 directly PiperOrigin-RevId: 700732639 * LLaVA Deployment notebook PiperOrigin-RevId: 701314497 * A fix in the prediction section PiperOrigin-RevId: 701847844 * Adding chunked prefill vllm server arg. PiperOrigin-RevId: 702022997 * Add vLLM + TPU Llama 3.1 and Qwen 2.5 deployment notebook. PiperOrigin-RevId: 702091534 * Added blog post URL (GoogleCloudPlatform#3739) * tfvision classification notebook PiperOrigin-RevId: 702659350 * Add new Llama 3.3 deployment notebook. PiperOrigin-RevId: 703537600 * Adding Qwen2.5-Instruct-32B-AWQ TPU configs to Colab deployment notebook PiperOrigin-RevId: 703541226 * Add Llama 3.3 finetuning notebook. PiperOrigin-RevId: 703546040 * Update the HF TGI and pytorch-inference notebooks, with the latest container image version. PiperOrigin-RevId: 704421485 * Enable dedicate endpoint for phi3 deployment PiperOrigin-RevId: 704865877 * mediapipe Object detection notebook PiperOrigin-RevId: 705082582 --------- Co-authored-by: Vertex MG Team <vertex-mg-bot@google.com> Co-authored-by: Eric Dong <itseric@google.com> Co-authored-by: Aiden010200 <150222139+Aiden010200@users.noreply.github.com> Co-authored-by: Pedro Melendez <elpeme@gmail.com> Co-authored-by: Minwoo Park <minwoopark@google.com>
1 parent 5ab2471 commit a9b2158

File tree

41 files changed

+3390
-692
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3390
-692
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from kfp.v2 import dsl
2+
3+
@dsl.component(base_image='python:3.8',packages_to_install=['google-cloud-aiplatform==1.36.0'])
4+
def pipelineJob(
5+
project_id: str,
6+
location: str,
7+
display_name: str,
8+
json_file: str,
9+
pipeline_root: str,
10+
):
11+
import os
12+
from google.cloud import aiplatform
13+
14+
aiplatform.init(
15+
project=project_id,
16+
location=location,
17+
)
18+
19+
job = aiplatform.PipelineJob(
20+
display_name=display_name,
21+
template_path=json_file,
22+
pipeline_root=pipeline_root,
23+
enable_caching=False,
24+
).run()
25+
26+
job.delete()
27+
28+
29+
@dsl.pipeline(name='pipelineJobs')
30+
def pipeline_run_jobs():
31+
# 1. create endpoint
32+
pipelineJob("990000000009", "us-west1", "Pipeline-create endpoint",
33+
"create_endpoint.json", "gs://pipeline-root-bucket/pipelines")
34+
35+
# 2. deploy model to endpoint
36+
pipelineJob("990000000009", "us-west1", "Pipeline-deploy model",
37+
"deploy_model.json", "gs://pipeline-root-bucket/pipelines")
38+
39+
if __name__ == "__main__":
40+
from kfp.v2 import compiler
41+
compiler.Compiler().compile(
42+
pipeline_func=pipeline_run_jobs,
43+
package_path='pipelineJobs.json')

notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"id": "63c7b05c4717"
77
},
88
"source": [
9-
"<a href=\"https://colab.research.google.com/github/xqr-g/vertex-ai-samples/blob/main/notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
9+
"<a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
1010
]
1111
},
1212
{
@@ -47,7 +47,7 @@
4747
" </a>\n",
4848
" </td>\n",
4949
" <td style=\"text-align: center\">\n",
50-
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fgenerative-ai%2Fbackoff_and_retry_for_LLMs.ipynb\">\n",
50+
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fgenerative_ai%2Fbackoff_and_retry_for_LLMs.ipynb\">\n",
5151
" <img width=\"32px\" src=\"https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png\" alt=\"Google Cloud Colab Enterprise logo\"><br> Open in Colab Enterprise\n",
5252
" </a>\n",
5353
" </td> \n",
@@ -85,7 +85,7 @@
8585
"\n",
8686
"This notebook demonstrates how sending large amounts of traffic to Gemini-1.5-Pro can cause \"429 Quota Exceeded Errors\" and how implementing a backoff-and-retry strategy can help complete jobs without interrupting operations.\n",
8787
"\n",
88-
"This notebook provides examples for the blog post: Don't let 429 errors leave your users hanging: A guide to handling resource exhaustion\n",
88+
"This notebook provides examples for the blog post: [Don't let resource exhaustion leave your users hanging: A guide to handling 429 errors](https://cloud.google.com/blog/products/ai-machine-learning/learn-how-to-handle-429-resource-exhaustion-errors-in-your-llms?e=48754805)\n",
8989
"\n",
9090
"This tutorial uses the following Google Cloud ML service:\n",
9191
"\n",

notebooks/community/migration/UJ7 AutoML for natural language with Vertex AI Text Entity Extraction.ipynb

Lines changed: 33 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -109,27 +109,27 @@
109109
"id": "3Sq3sGfdt89E"
110110
},
111111
"source": [
112-
"## Before you begin\r\n",
113-
"\r\n",
114-
"### GPU run-time\r\n",
115-
"\r\n",
116-
"*Make sure you're running this notebook in a GPU runtime if you have that option. In Colab, select* **Runtime > Change Runtime Type > GPU**\r\n",
117-
"\r\n",
118-
"### Set up your GCP project\r\n",
119-
"\r\n",
120-
"**The following steps are required, regardless of your notebook environment.**\r\n",
121-
"\r\n",
122-
"1. [Select or create a GCP project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\r\n",
123-
"\r\n",
124-
"2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)\r\n",
125-
"\r\n",
126-
"3. [Enable the Vertex APIs and Compute Engine APIs.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)\r\n",
127-
"\r\n",
128-
"4. [Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebooks.\r\n",
129-
"\r\n",
130-
"5. Enter your project ID in the cell below. Then run the cell to make sure the\r\n",
131-
"Cloud SDK uses the right project for all the commands in this notebook.\r\n",
132-
"\r\n",
112+
"## Before you begin\n",
113+
"\n",
114+
"### GPU run-time\n",
115+
"\n",
116+
"*Make sure you're running this notebook in a GPU runtime if you have that option. In Colab, select* **Runtime > Change Runtime Type > GPU**\n",
117+
"\n",
118+
"### Set up your GCP project\n",
119+
"\n",
120+
"**The following steps are required, regardless of your notebook environment.**\n",
121+
"\n",
122+
"1. [Select or create a GCP project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\n",
123+
"\n",
124+
"2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)\n",
125+
"\n",
126+
"3. [Enable the Vertex APIs and Compute Engine APIs.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)\n",
127+
"\n",
128+
"4. [Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebooks.\n",
129+
"\n",
130+
"5. Enter your project ID in the cell below. Then run the cell to make sure the\n",
131+
"Cloud SDK uses the right project for all the commands in this notebook.\n",
132+
"\n",
133133
"**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands."
134134
]
135135
},
@@ -240,11 +240,11 @@
240240
"id": "9zpjPUOhvRQz"
241241
},
242242
"source": [
243-
"### Authenticate your GCP account\r\n",
244-
"\r\n",
245-
"**If you are using Google Cloud Notebooks**, your environment is already\r\n",
246-
"authenticated. Skip this step.\r\n",
247-
"\r\n",
243+
"### Authenticate your GCP account\n",
244+
"\n",
245+
"**If you are using Google Cloud Notebooks**, your environment is already\n",
246+
"authenticated. Skip this step.\n",
247+
"\n",
248248
"*Note: If you are on an Vertex notebook and run the cell, the cell knows to skip executing the authentication steps.*"
249249
]
250250
},
@@ -1459,8 +1459,7 @@
14591459
"id": "gM-YixlLmDy9"
14601460
},
14611461
"source": [
1462-
"### Make a batch prediction file\r\n",
1463-
"\r\n"
1462+
"### Make a batch prediction file\n"
14641463
]
14651464
},
14661465
{
@@ -1489,19 +1488,6 @@
14891488
"! gsutil cat $gcs_test_item"
14901489
]
14911490
},
1492-
{
1493-
"cell_type": "markdown",
1494-
"metadata": {
1495-
"id": "sPupiwqN_jAB"
1496-
},
1497-
"source": [
1498-
"*Example output*:\n",
1499-
"```\n",
1500-
"{\"content\": \"gs://migration-ucaip-trainingaip-20210301154552/test.txt\", \"mime_type\": \"text/plain\"}\n",
1501-
"Molecular basis of hexosaminidase A deficiency and pseudodeficiency in the Berks County Pennsylvania Dutch.\\tFollowing the birth of two infants with Tay-Sachs disease ( TSD ) , a non-Jewish , Pennsylvania Dutch kindred was screened for TSD carriers using the biochemical assay . A high frequency of individuals who appeared to be TSD heterozygotes was detected ( Kelly et al . , 1975 ) . Clinical and biochemical evidence suggested that the increased carrier frequency was due to at least two altered alleles for the hexosaminidase A alpha-subunit . We now report two mutant alleles in this Pennsylvania Dutch kindred , and one polymorphism . One allele , reported originally in a French TSD patient ( Akli et al . , 1991 ) , is a GT-- > AT transition at the donor splice-site of intron 9 . The second , a C-- > T transition at nucleotide 739 ( Arg247Trp ) , has been shown by Triggs-Raine et al . ( 1992 ) to be a clinically benign \" pseudodeficient \" allele associated with reduced enzyme activity against artificial substrate . Finally , a polymorphism [ G-- > A ( 759 ) ] , which leaves valine at codon 253 unchanged , is described\n",
1502-
"```\n"
1503-
]
1504-
},
15051491
{
15061492
"cell_type": "markdown",
15071493
"metadata": {
@@ -1557,45 +1543,6 @@
15571543
")"
15581544
]
15591545
},
1560-
{
1561-
"cell_type": "markdown",
1562-
"metadata": {
1563-
"id": "sPupiwqN_jAB"
1564-
},
1565-
"source": [
1566-
"*Example output*:\n",
1567-
"```\n",
1568-
"{\n",
1569-
" \"parent\": \"projects/migration-ucaip-training/locations/us-central1\",\n",
1570-
" \"batchPredictionJob\": {\n",
1571-
" \"displayName\": \"ten_20210301154552\",\n",
1572-
" \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
1573-
" \"inputConfig\": {\n",
1574-
" \"instancesFormat\": \"jsonl\",\n",
1575-
" \"gcsSource\": {\n",
1576-
" \"uris\": [\n",
1577-
" \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
1578-
" ]\n",
1579-
" }\n",
1580-
" },\n",
1581-
" \"outputConfig\": {\n",
1582-
" \"predictionsFormat\": \"jsonl\",\n",
1583-
" \"gcsDestination\": {\n",
1584-
" \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
1585-
" }\n",
1586-
" },\n",
1587-
" \"dedicatedResources\": {\n",
1588-
" \"machineSpec\": {\n",
1589-
" \"machineType\": \"n1-standard-2\"\n",
1590-
" },\n",
1591-
" \"startingReplicaCount\": 1,\n",
1592-
" \"maxReplicaCount\": 1\n",
1593-
" }\n",
1594-
" }\n",
1595-
"}\n",
1596-
"```\n"
1597-
]
1598-
},
15991546
{
16001547
"cell_type": "markdown",
16011548
"metadata": {
@@ -1638,42 +1585,6 @@
16381585
"print(MessageToJson(request.__dict__[\"_pb\"]))"
16391586
]
16401587
},
1641-
{
1642-
"cell_type": "markdown",
1643-
"metadata": {
1644-
"id": "sPupiwqN_jAB"
1645-
},
1646-
"source": [
1647-
"*Example output*:\n",
1648-
"```\n",
1649-
"{\n",
1650-
" \"name\": \"projects/116273516712/locations/us-central1/batchPredictionJobs/3588251799200464896\",\n",
1651-
" \"displayName\": \"ten_20210301154552\",\n",
1652-
" \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
1653-
" \"inputConfig\": {\n",
1654-
" \"instancesFormat\": \"jsonl\",\n",
1655-
" \"gcsSource\": {\n",
1656-
" \"uris\": [\n",
1657-
" \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
1658-
" ]\n",
1659-
" }\n",
1660-
" },\n",
1661-
" \"outputConfig\": {\n",
1662-
" \"predictionsFormat\": \"jsonl\",\n",
1663-
" \"gcsDestination\": {\n",
1664-
" \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
1665-
" }\n",
1666-
" },\n",
1667-
" \"state\": \"JOB_STATE_PENDING\",\n",
1668-
" \"completionStats\": {\n",
1669-
" \"incompleteCount\": \"-1\"\n",
1670-
" },\n",
1671-
" \"createTime\": \"2021-03-01T17:59:42.777083Z\",\n",
1672-
" \"updateTime\": \"2021-03-01T17:59:42.777083Z\"\n",
1673-
"}\n",
1674-
"```\n"
1675-
]
1676-
},
16771588
{
16781589
"cell_type": "code",
16791590
"execution_count": null,
@@ -1739,42 +1650,6 @@
17391650
"print(MessageToJson(request.__dict__[\"_pb\"]))"
17401651
]
17411652
},
1742-
{
1743-
"cell_type": "markdown",
1744-
"metadata": {
1745-
"id": "sPupiwqN_jAB"
1746-
},
1747-
"source": [
1748-
"*Example output*:\n",
1749-
"```\n",
1750-
"{\n",
1751-
" \"name\": \"projects/116273516712/locations/us-central1/batchPredictionJobs/3588251799200464896\",\n",
1752-
" \"displayName\": \"ten_20210301154552\",\n",
1753-
" \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
1754-
" \"inputConfig\": {\n",
1755-
" \"instancesFormat\": \"jsonl\",\n",
1756-
" \"gcsSource\": {\n",
1757-
" \"uris\": [\n",
1758-
" \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
1759-
" ]\n",
1760-
" }\n",
1761-
" },\n",
1762-
" \"outputConfig\": {\n",
1763-
" \"predictionsFormat\": \"jsonl\",\n",
1764-
" \"gcsDestination\": {\n",
1765-
" \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
1766-
" }\n",
1767-
" },\n",
1768-
" \"state\": \"JOB_STATE_PENDING\",\n",
1769-
" \"completionStats\": {\n",
1770-
" \"incompleteCount\": \"-1\"\n",
1771-
" },\n",
1772-
" \"createTime\": \"2021-03-01T17:59:42.777083Z\",\n",
1773-
" \"updateTime\": \"2021-03-01T17:59:42.777083Z\"\n",
1774-
"}\n",
1775-
"```\n"
1776-
]
1777-
},
17781653
{
17791654
"cell_type": "code",
17801655
"execution_count": null,
@@ -1798,19 +1673,6 @@
17981673
" time.sleep(60)"
17991674
]
18001675
},
1801-
{
1802-
"cell_type": "markdown",
1803-
"metadata": {
1804-
"id": "trainingpipelines_create:migration,new,response,icn"
1805-
},
1806-
"source": [
1807-
"*Example output*:\n",
1808-
"```\n",
1809-
"gs://migration-ucaip-trainingaip-20210301154552/batch_output/prediction-ten_20210301154552-2021-03-01T17:59:42.638222Z/predictions_00001.jsonl\n",
1810-
"{\"instance\":{\"content\":\"gs://migration-ucaip-trainingaip-20210301154552/test.txt\",\"mimeType\":\"text/plain\"},\"prediction\":{\"ids\":[\"7806436899697983488\",\"7806436899697983488\",\"7806436899697983488\",\"4347672385877442560\",\"4347672385877442560\",\"4347672385877442560\"],\"displayNames\":[\"SpecificDisease\",\"SpecificDisease\",\"SpecificDisease\",\"Modifier\",\"Modifier\",\"Modifier\"],\"textSegmentStartOffsets\":[\"149\",\"19\",\"169\",\"236\",\"688\",\"330\"],\"textSegmentEndOffsets\":[\"165\",\"45\",\"171\",\"238\",\"690\",\"332\"],\"confidences\":[0.99957836,0.9995628,0.9995044,0.9993287,0.9993144,0.99927235]}}\n",
1811-
"```\n"
1812-
]
1813-
},
18141676
{
18151677
"cell_type": "markdown",
18161678
"metadata": {
@@ -2338,11 +2200,11 @@
23382200
"id": "bQ-VVaSxJjkd"
23392201
},
23402202
"source": [
2341-
"# Cleaning up\r\n",
2342-
"\r\n",
2343-
"To clean up all GCP resources used in this project, you can [delete the GCP\r\n",
2344-
"project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\r\n",
2345-
"\r\n",
2203+
"# Cleaning up\n",
2204+
"\n",
2205+
"To clean up all GCP resources used in this project, you can [delete the GCP\n",
2206+
"project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
2207+
"\n",
23462208
"Otherwise, you can delete the individual resources you created in this tutorial."
23472209
]
23482210
},
@@ -2404,7 +2266,7 @@
24042266
],
24052267
"metadata": {
24062268
"colab": {
2407-
"name": "UJ7 unified AutoML for natural language with Vertex AI Text Entity Extraction.ipynb",
2269+
"name": "UJ7 AutoML for natural language with Vertex AI Text Entity Extraction.ipynb",
24082270
"toc_visible": true
24092271
},
24102272
"kernelspec": {

notebooks/community/model_garden/model_garden_codegemma_deployment_on_vertex.ipynb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@
556556
" enable_trust_remote_code: bool = False,\n",
557557
" enforce_eager: bool = False,\n",
558558
" enable_lora: bool = False,\n",
559+
" enable_chunked_prefill: bool = False,\n",
559560
" max_loras: int = 1,\n",
560561
" max_cpu_loras: int = 8,\n",
561562
" use_dedicated_endpoint: bool = False,\n",
@@ -599,6 +600,9 @@
599600
" if enable_lora:\n",
600601
" vllm_args.append(\"--enable-lora\")\n",
601602
"\n",
603+
" if enable_chunked_prefill:\n",
604+
" vllm_args.append(\"--enable-chunked-prefill\")\n",
605+
"\n",
602606
" if model_type:\n",
603607
" vllm_args.append(f\"--model-type={model_type}\")\n",
604608
"\n",

notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,7 @@
653653
" enable_trust_remote_code: bool = False,\n",
654654
" enforce_eager: bool = False,\n",
655655
" enable_lora: bool = False,\n",
656+
" enable_chunked_prefill: bool = False,\n",
656657
" max_loras: int = 1,\n",
657658
" max_cpu_loras: int = 8,\n",
658659
" use_dedicated_endpoint: bool = False,\n",
@@ -696,6 +697,9 @@
696697
" if enable_lora:\n",
697698
" vllm_args.append(\"--enable-lora\")\n",
698699
"\n",
700+
" if enable_chunked_prefill:\n",
701+
" vllm_args.append(\"--enable-chunked-prefill\")\n",
702+
"\n",
699703
" if model_type:\n",
700704
" vllm_args.append(f\"--model-type={model_type}\")\n",
701705
"\n",

0 commit comments

Comments
 (0)