LLaVA Deployment notebook

vertex-mg-bot · copybara-github · commit 0ad4665c1b61 · 2024-11-22T11:11:47.000-08:00
PiperOrigin-RevId: 699223982
diff --git a/notebooks/community/model_garden/model_garden_pytorch_llava.ipynb b/notebooks/community/model_garden/model_garden_pytorch_llava.ipynb
@@ -102,6 +102,15 @@
         "\n",
         "REGION = \"\"  # @param {type:\"string\"}\n",
         "\n",
+        "# @markdown 4. If you want to run predictions with A100 80GB or H100 GPUs, we recommend using the regions listed below. **NOTE:** Make sure you have associated quota in selected regions. Click the links to see your current quota for each GPU type: [Nvidia A100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_a100_80gb_gpus), [Nvidia H100 80GB](https://console.cloud.google.com/iam-admin/quotas?metric=aiplatform.googleapis.com%2Fcustom_model_serving_nvidia_h100_gpus).\n",
+        "\n",
+        "# @markdown > | Machine Type | Accelerator Type | Recommended Regions |\n",
+        "# @markdown | ----------- | ----------- | ----------- |\n",
+        "# @markdown | a2-ultragpu-1g | 1 NVIDIA_A100_80GB | us-central1, us-east4, europe-west4, asia-southeast1, us-east4 |\n",
+        "# @markdown | a3-highgpu-2g | 2 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-4g | 4 NVIDIA_H100_80GB | us-west1, asia-southeast1, europe-west4 |\n",
+        "# @markdown | a3-highgpu-8g | 8 NVIDIA_H100_80GB | us-central1, us-east5, europe-west4, us-west1, asia-southeast1 |\n",
+        "\n",
         "import datetime\n",
         "# Import the necessary packages\n",
         "import importlib\n",
@@ -355,11 +364,6 @@
         "image_url = \"https://llava-vl.github.io/static/images/view.jpg\"  # @param {type:\"string\"}\n",
         "# fmt: on\n",
         "\n",
-        "image = common_util.download_image(image_url)\n",
-        "resized_image = common_util.resize_image(image, 800)\n",
-        "image_base64 = common_util.image_to_base64(resized_image)\n",
-        "display(resized_image)\n",
-        "\n",
         "# Loads an existing endpoint instance using the endpoint name:\n",
         "# - Using `endpoint_name = endpoint.name` allows us to get the\n",
         "#   endpoint name of the endpoint `endpoint` created in the cell\n",
@@ -385,7 +389,7 @@
         "instances = [\n",
         "    {\n",
         "        \"prompt\": prompt,\n",
-        "        \"multi_modal_data\": {\"image\": image_base64},\n",
+        "        \"multi_modal_data\": {\"image\": image_url},\n",
         "        \"max_tokens\": max_tokens,\n",
         "        \"temperature\": temperature,\n",
         "        \"top_p\": top_p,\n",