anubbhav-malhotra
diff --git a/‎community-content/vertex_pipeline_kfpv2/pipelinejob.py‎
Lines changed: 43 additions & 0 deletions b/‎community-content/vertex_pipeline_kfpv2/pipelinejob.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb‎
Lines changed: 3 additions & 3 deletions b/‎notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎notebooks/community/migration/UJ7 AutoML for natural language with Vertex AI Text Entity Extraction.ipynb‎
Lines changed: 33 additions & 171 deletions b/‎notebooks/community/migration/UJ7 AutoML for natural language with Vertex AI Text Entity Extraction.ipynb‎
Lines changed: 33 additions & 171 deletions
diff --git a/‎notebooks/community/model_garden/model_garden_codegemma_deployment_on_vertex.ipynb‎
Lines changed: 4 additions & 0 deletions b/‎notebooks/community/model_garden/model_garden_codegemma_deployment_on_vertex.ipynb‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb‎
Lines changed: 4 additions & 0 deletions b/‎notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,43 @@
+from kfp.v2 import dsl
+
+@dsl.component(base_image='python:3.8',packages_to_install=['google-cloud-aiplatform==1.36.0'])
+def pipelineJob(
+    project_id: str,
+    location: str,
+    display_name: str,
+    json_file: str,
+    pipeline_root: str,
+):
+    import os
+    from google.cloud import aiplatform
+
+    aiplatform.init(
+        project=project_id,
+        location=location,
+    )
+
+    job = aiplatform.PipelineJob(
+        display_name=display_name,
+        template_path=json_file,
+        pipeline_root=pipeline_root,
+        enable_caching=False,
+    ).run()
+
+    job.delete()
+
+
+@dsl.pipeline(name='pipelineJobs')
+def pipeline_run_jobs():
+    # 1. create endpoint
+    pipelineJob("990000000009", "us-west1", "Pipeline-create endpoint",
+        "create_endpoint.json", "gs://pipeline-root-bucket/pipelines")
+        
+    # 2. deploy model to endpoint
+    pipelineJob("990000000009", "us-west1", "Pipeline-deploy model",
+        "deploy_model.json", "gs://pipeline-root-bucket/pipelines")
+
+if __name__ == "__main__":
+    from kfp.v2 import compiler
+    compiler.Compiler().compile(
+        pipeline_func=pipeline_run_jobs,
+        package_path='pipelineJobs.json')
@@ -6,7 +6,7 @@
         "id": "63c7b05c4717"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/xqr-g/vertex-ai-samples/blob/main/notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/generative_ai/backoff_and_retry_for_LLMs.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
       ]
     },
     {
@@ -47,7 +47,7 @@
         "    </a>\n",
         "  </td>\n",
         "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fgenerative-ai%2Fbackoff_and_retry_for_LLMs.ipynb\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fgenerative_ai%2Fbackoff_and_retry_for_LLMs.ipynb\">\n",
         "      <img width=\"32px\" src=\"https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png\" alt=\"Google Cloud Colab Enterprise logo\"><br> Open in Colab Enterprise\n",
         "    </a>\n",
         "  </td>    \n",
@@ -85,7 +85,7 @@
         "\n",
         "This notebook demonstrates how sending large amounts of traffic to Gemini-1.5-Pro can cause \"429 Quota Exceeded Errors\" and how implementing a backoff-and-retry strategy can help complete jobs without interrupting operations.\n",
         "\n",
-        "This notebook provides examples for the blog post: Don't let 429 errors leave your users hanging: A guide to handling resource exhaustion\n",
+        "This notebook provides examples for the blog post: [Don't let resource exhaustion leave your users hanging: A guide to handling 429 errors](https://cloud.google.com/blog/products/ai-machine-learning/learn-how-to-handle-429-resource-exhaustion-errors-in-your-llms?e=48754805)\n",
         "\n",
         "This tutorial uses the following Google Cloud ML service:\n",
         "\n",
 
@@ -109,27 +109,27 @@
         "id": "3Sq3sGfdt89E"
       },
       "source": [
-        "## Before you begin\r\n",
-        "\r\n",
-        "### GPU run-time\r\n",
-        "\r\n",
-        "*Make sure you're running this notebook in a GPU runtime if you have that option. In Colab, select* **Runtime > Change Runtime Type > GPU**\r\n",
-        "\r\n",
-        "### Set up your GCP project\r\n",
-        "\r\n",
-        "**The following steps are required, regardless of your notebook environment.**\r\n",
-        "\r\n",
-        "1. [Select or create a GCP project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\r\n",
-        "\r\n",
-        "2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)\r\n",
-        "\r\n",
-        "3. [Enable the Vertex APIs and Compute Engine APIs.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)\r\n",
-        "\r\n",
-        "4. [Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebooks.\r\n",
-        "\r\n",
-        "5. Enter your project ID in the cell below. Then run the  cell to make sure the\r\n",
-        "Cloud SDK uses the right project for all the commands in this notebook.\r\n",
-        "\r\n",
+        "## Before you begin\n",
+        "\n",
+        "### GPU run-time\n",
+        "\n",
+        "*Make sure you're running this notebook in a GPU runtime if you have that option. In Colab, select* **Runtime > Change Runtime Type > GPU**\n",
+        "\n",
+        "### Set up your GCP project\n",
+        "\n",
+        "**The following steps are required, regardless of your notebook environment.**\n",
+        "\n",
+        "1. [Select or create a GCP project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\n",
+        "\n",
+        "2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)\n",
+        "\n",
+        "3. [Enable the Vertex APIs and Compute Engine APIs.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)\n",
+        "\n",
+        "4. [Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebooks.\n",
+        "\n",
+        "5. Enter your project ID in the cell below. Then run the  cell to make sure the\n",
+        "Cloud SDK uses the right project for all the commands in this notebook.\n",
+        "\n",
         "**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands."
       ]
     },
@@ -240,11 +240,11 @@
         "id": "9zpjPUOhvRQz"
       },
       "source": [
-        "### Authenticate your GCP account\r\n",
-        "\r\n",
-        "**If you are using Google Cloud Notebooks**, your environment is already\r\n",
-        "authenticated. Skip this step.\r\n",
-        "\r\n",
+        "### Authenticate your GCP account\n",
+        "\n",
+        "**If you are using Google Cloud Notebooks**, your environment is already\n",
+        "authenticated. Skip this step.\n",
+        "\n",
         "*Note: If you are on an Vertex notebook and run the cell, the cell knows to skip executing the authentication steps.*"
       ]
     },
@@ -1459,8 +1459,7 @@
         "id": "gM-YixlLmDy9"
       },
       "source": [
-        "### Make a batch prediction file\r\n",
-        "\r\n"
+        "### Make a batch prediction file\n"
       ]
     },
     {
@@ -1489,19 +1488,6 @@
         "! gsutil cat $gcs_test_item"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sPupiwqN_jAB"
-      },
-      "source": [
-        "*Example output*:\n",
-        "```\n",
-        "{\"content\": \"gs://migration-ucaip-trainingaip-20210301154552/test.txt\", \"mime_type\": \"text/plain\"}\n",
-        "Molecular basis of hexosaminidase A deficiency and pseudodeficiency in the Berks County Pennsylvania Dutch.\\tFollowing the birth of two infants with Tay-Sachs disease ( TSD ) , a non-Jewish , Pennsylvania Dutch kindred was screened for TSD carriers using the biochemical assay . A high frequency of individuals who appeared to be TSD heterozygotes was detected ( Kelly et al . , 1975 ) . Clinical and biochemical evidence suggested that the increased carrier frequency was due to at least two altered alleles for the hexosaminidase A alpha-subunit . We now report two mutant alleles in this Pennsylvania Dutch kindred , and one polymorphism . One allele , reported originally in a French TSD patient ( Akli et al . , 1991 ) , is a GT-- > AT transition at the donor splice-site of intron 9 . The second , a C-- > T transition at nucleotide 739 ( Arg247Trp ) , has been shown by Triggs-Raine et al . ( 1992 ) to be a clinically benign \" pseudodeficient \" allele associated with reduced enzyme activity against artificial substrate . Finally , a polymorphism [ G-- > A ( 759 ) ] , which leaves valine at codon 253 unchanged , is described\n",
-        "```\n"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1557,45 +1543,6 @@
         ")"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sPupiwqN_jAB"
-      },
-      "source": [
-        "*Example output*:\n",
-        "```\n",
-        "{\n",
-        "  \"parent\": \"projects/migration-ucaip-training/locations/us-central1\",\n",
-        "  \"batchPredictionJob\": {\n",
-        "    \"displayName\": \"ten_20210301154552\",\n",
-        "    \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
-        "    \"inputConfig\": {\n",
-        "      \"instancesFormat\": \"jsonl\",\n",
-        "      \"gcsSource\": {\n",
-        "        \"uris\": [\n",
-        "          \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
-        "        ]\n",
-        "      }\n",
-        "    },\n",
-        "    \"outputConfig\": {\n",
-        "      \"predictionsFormat\": \"jsonl\",\n",
-        "      \"gcsDestination\": {\n",
-        "        \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
-        "      }\n",
-        "    },\n",
-        "    \"dedicatedResources\": {\n",
-        "      \"machineSpec\": {\n",
-        "        \"machineType\": \"n1-standard-2\"\n",
-        "      },\n",
-        "      \"startingReplicaCount\": 1,\n",
-        "      \"maxReplicaCount\": 1\n",
-        "    }\n",
-        "  }\n",
-        "}\n",
-        "```\n"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1638,42 +1585,6 @@
         "print(MessageToJson(request.__dict__[\"_pb\"]))"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sPupiwqN_jAB"
-      },
-      "source": [
-        "*Example output*:\n",
-        "```\n",
-        "{\n",
-        "  \"name\": \"projects/116273516712/locations/us-central1/batchPredictionJobs/3588251799200464896\",\n",
-        "  \"displayName\": \"ten_20210301154552\",\n",
-        "  \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
-        "  \"inputConfig\": {\n",
-        "    \"instancesFormat\": \"jsonl\",\n",
-        "    \"gcsSource\": {\n",
-        "      \"uris\": [\n",
-        "        \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
-        "      ]\n",
-        "    }\n",
-        "  },\n",
-        "  \"outputConfig\": {\n",
-        "    \"predictionsFormat\": \"jsonl\",\n",
-        "    \"gcsDestination\": {\n",
-        "      \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
-        "    }\n",
-        "  },\n",
-        "  \"state\": \"JOB_STATE_PENDING\",\n",
-        "  \"completionStats\": {\n",
-        "    \"incompleteCount\": \"-1\"\n",
-        "  },\n",
-        "  \"createTime\": \"2021-03-01T17:59:42.777083Z\",\n",
-        "  \"updateTime\": \"2021-03-01T17:59:42.777083Z\"\n",
-        "}\n",
-        "```\n"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1739,42 +1650,6 @@
         "print(MessageToJson(request.__dict__[\"_pb\"]))"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sPupiwqN_jAB"
-      },
-      "source": [
-        "*Example output*:\n",
-        "```\n",
-        "{\n",
-        "  \"name\": \"projects/116273516712/locations/us-central1/batchPredictionJobs/3588251799200464896\",\n",
-        "  \"displayName\": \"ten_20210301154552\",\n",
-        "  \"model\": \"projects/116273516712/locations/us-central1/models/4400738115568795648\",\n",
-        "  \"inputConfig\": {\n",
-        "    \"instancesFormat\": \"jsonl\",\n",
-        "    \"gcsSource\": {\n",
-        "      \"uris\": [\n",
-        "        \"gs://migration-ucaip-trainingaip-20210301154552/test.jsonl\"\n",
-        "      ]\n",
-        "    }\n",
-        "  },\n",
-        "  \"outputConfig\": {\n",
-        "    \"predictionsFormat\": \"jsonl\",\n",
-        "    \"gcsDestination\": {\n",
-        "      \"outputUriPrefix\": \"gs://migration-ucaip-trainingaip-20210301154552/batch_output/\"\n",
-        "    }\n",
-        "  },\n",
-        "  \"state\": \"JOB_STATE_PENDING\",\n",
-        "  \"completionStats\": {\n",
-        "    \"incompleteCount\": \"-1\"\n",
-        "  },\n",
-        "  \"createTime\": \"2021-03-01T17:59:42.777083Z\",\n",
-        "  \"updateTime\": \"2021-03-01T17:59:42.777083Z\"\n",
-        "}\n",
-        "```\n"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1798,19 +1673,6 @@
         "    time.sleep(60)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "trainingpipelines_create:migration,new,response,icn"
-      },
-      "source": [
-        "*Example output*:\n",
-        "```\n",
-        "gs://migration-ucaip-trainingaip-20210301154552/batch_output/prediction-ten_20210301154552-2021-03-01T17:59:42.638222Z/predictions_00001.jsonl\n",
-        "{\"instance\":{\"content\":\"gs://migration-ucaip-trainingaip-20210301154552/test.txt\",\"mimeType\":\"text/plain\"},\"prediction\":{\"ids\":[\"7806436899697983488\",\"7806436899697983488\",\"7806436899697983488\",\"4347672385877442560\",\"4347672385877442560\",\"4347672385877442560\"],\"displayNames\":[\"SpecificDisease\",\"SpecificDisease\",\"SpecificDisease\",\"Modifier\",\"Modifier\",\"Modifier\"],\"textSegmentStartOffsets\":[\"149\",\"19\",\"169\",\"236\",\"688\",\"330\"],\"textSegmentEndOffsets\":[\"165\",\"45\",\"171\",\"238\",\"690\",\"332\"],\"confidences\":[0.99957836,0.9995628,0.9995044,0.9993287,0.9993144,0.99927235]}}\n",
-        "```\n"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -2338,11 +2200,11 @@
         "id": "bQ-VVaSxJjkd"
       },
       "source": [
-        "# Cleaning up\r\n",
-        "\r\n",
-        "To clean up all GCP resources used in this project, you can [delete the GCP\r\n",
-        "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\r\n",
-        "\r\n",
+        "# Cleaning up\n",
+        "\n",
+        "To clean up all GCP resources used in this project, you can [delete the GCP\n",
+        "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
+        "\n",
         "Otherwise, you can delete the individual resources you created in this tutorial."
       ]
     },
@@ -2404,7 +2266,7 @@
   ],
   "metadata": {
     "colab": {
-      "name": "UJ7 unified AutoML for natural language with Vertex AI Text Entity Extraction.ipynb",
+      "name": "UJ7 AutoML for natural language with Vertex AI Text Entity Extraction.ipynb",
       "toc_visible": true
     },
     "kernelspec": {
 
@@ -556,6 +556,7 @@
         "    enable_trust_remote_code: bool = False,\n",
         "    enforce_eager: bool = False,\n",
         "    enable_lora: bool = False,\n",
+        "    enable_chunked_prefill: bool = False,\n",
         "    max_loras: int = 1,\n",
         "    max_cpu_loras: int = 8,\n",
         "    use_dedicated_endpoint: bool = False,\n",
@@ -599,6 +600,9 @@
         "    if enable_lora:\n",
         "        vllm_args.append(\"--enable-lora\")\n",
         "\n",
+        "    if enable_chunked_prefill:\n",
+        "        vllm_args.append(\"--enable-chunked-prefill\")\n",
+        "\n",
         "    if model_type:\n",
         "        vllm_args.append(f\"--model-type={model_type}\")\n",
         "\n",
 
@@ -653,6 +653,7 @@
         "    enable_trust_remote_code: bool = False,\n",
         "    enforce_eager: bool = False,\n",
         "    enable_lora: bool = False,\n",
+        "    enable_chunked_prefill: bool = False,\n",
         "    max_loras: int = 1,\n",
         "    max_cpu_loras: int = 8,\n",
         "    use_dedicated_endpoint: bool = False,\n",
@@ -696,6 +697,9 @@
         "    if enable_lora:\n",
         "        vllm_args.append(\"--enable-lora\")\n",
         "\n",
+        "    if enable_chunked_prefill:\n",
+        "        vllm_args.append(\"--enable-chunked-prefill\")\n",
+        "\n",
         "    if model_type:\n",
         "        vllm_args.append(f\"--model-type={model_type}\")\n",
         "\n",