From c24fac01635f3e7f009baa267579b5294f2cf3f8 Mon Sep 17 00:00:00 2001 From: Margubur Rahman Date: Thu, 16 Oct 2025 11:22:35 +0000 Subject: [PATCH 1/4] Migrate gsutil usage to gcloud storage --- .../prediction_featurestore_integration.ipynb | 18 +++++-------- .../get_started_with_model_registry.ipynb | 9 +++---- .../model_garden_openai_api_llama3_1.ipynb | 6 ++--- ...el_garden_pytorch_mixtral_deployment.ipynb | 14 +++++----- ...object_detection_on_vertex_endpoints.ipynb | 15 ++++------- .../sdk-feature-store-pandas.ipynb | 11 +++----- ..._model_training_and_batch_prediction.ipynb | 27 +++++++------------ ...tarted_with_vertex_private_endpoints.ipynb | 9 +++---- .../wide_and_deep_on_vertex_pipelines.ipynb | 6 ++--- ...ertex_training_with_custom_container.ipynb | 12 +++------ 10 files changed, 44 insertions(+), 83 deletions(-) diff --git a/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb b/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb index c6960b254..d38530fba 100644 --- a/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb +++ b/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb @@ -293,8 +293,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI" - ] + "! gcloud storage buckets create --location=$REGION --project=$PROJECT_ID $BUCKET_URI" ] }, { "cell_type": "code", @@ -304,8 +303,7 @@ }, "outputs": [], "source": [ - "! gsutil ls -al $BUCKET_URI" - ] + "! gcloud storage ls --all-versions --long $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -324,8 +322,7 @@ }, "outputs": [], "source": [ - "! gsutil cp -r gs://mco-mm/churn/* $BUCKET_URI" - ] + "! gcloud storage cp --recursive gs://mco-mm/churn/* $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -1063,8 +1060,7 @@ "outputs": [], "source": [ "# Remove if the file already exists\n", - "!gsutil rm $BUCKET_URI/prediction_featuresstore_fetch_config.yaml" - ] + "!gcloud storage rm $BUCKET_URI/prediction_featuresstore_fetch_config.yaml" ] }, { "cell_type": "code", @@ -1074,8 +1070,7 @@ }, "outputs": [], "source": [ - "!gsutil cp prediction_featuresstore_fetch_config.yaml $BUCKET_URI" - ] + "!gcloud storage cp prediction_featuresstore_fetch_config.yaml $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -1475,8 +1470,7 @@ "outputs": [], "source": [ "# Delete bucket\n", - "!gsutil -m rm -r $BUCKET_URI" - ] + "!gcloud storage rm --recursive $BUCKET_URI" ] } ], "metadata": { diff --git a/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb b/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb index 224f0d143..0cf458bf4 100644 --- a/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb +++ b/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb @@ -441,8 +441,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l $REGION $BUCKET_URI" - ] + "! gcloud storage buckets create --location=$REGION $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -461,8 +460,7 @@ }, "outputs": [], "source": [ - "! gsutil ls -al $BUCKET_URI" - ] + "! gcloud storage ls --all-versions --long $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -1176,8 +1174,7 @@ " print(e)\n", "\n", "if delete_bucket or os.getenv(\"IS_TESTING\"):\n", - " ! gsutil rm -rf {BUCKET_URI}" - ] + " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}" ] } ], "metadata": { diff --git a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb index 6bf4011f3..8a7392102 100644 --- a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb +++ b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb @@ -253,8 +253,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}" - ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] }, { "cell_type": "markdown", @@ -1237,8 +1236,7 @@ " rag.delete_corpus(name=rag_corpus.name)\n", "\n", "if delete_bucket:\n", - " ! gsutil -m rm -r $BUCKET_NAME" - ] + " ! gcloud storage rm --recursive $BUCKET_NAME" ] } ], "metadata": { diff --git a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb index c0484bbfd..02ae0d477 100644 --- a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb +++ b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb @@ -174,11 +174,10 @@ "if BUCKET_URI is None or BUCKET_URI.strip() == \"\" or BUCKET_URI == \"gs://\":\n", " BUCKET_URI = f\"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}\"\n", " BUCKET_NAME = \"/\".join(BUCKET_URI.split(\"/\")[:3])\n", - " ! gsutil mb -l {REGION} {BUCKET_URI}\n", - "else:\n", + " ! gcloud storage buckets create --location {REGION} {BUCKET_URI}\n", "else:\n", " assert BUCKET_URI.startswith(\"gs://\"), \"BUCKET_URI must start with `gs://`.\"\n", - " shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep \"Location constraint:\" | sed \"s/Location constraint://\"\n", - " bucket_region = shell_output[0].strip().lower()\n", + " # Note: The format of the full listing output is different. gcloud storage uses a title case for keys and will not display a field if its value is \"None\".\n", + " shell_output = ! gcloud storage ls --full --buckets {BUCKET_NAME} | grep \"Location constraint:\" | sed \"s/Location constraint://\"\n", " bucket_region = shell_output[0].strip().lower()\n", " if bucket_region != REGION:\n", " raise ValueError(\n", " \"Bucket region %s is different from notebook region %s\"\n", @@ -202,8 +201,8 @@ "\n", "\n", "# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket\n", - "! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME\n", - "\n", + "# Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop.\n", + "! gcloud storage buckets add-iam-policy-binding $BUCKET_NAME --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.admin\n", "\n", "! gcloud config set project $PROJECT_ID\n", "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n", "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/aiplatform.user\"" @@ -826,8 +825,7 @@ "\n", "delete_bucket = False # @param {type:\"boolean\"}\n", "if delete_bucket:\n", - " ! gsutil -m rm -r $BUCKET_NAME" - ] + " ! gcloud storage rm --recursive $BUCKET_NAME" ] } ], "metadata": { diff --git a/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb b/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb index 188ba2124..5fef39e60 100644 --- a/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb +++ b/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb @@ -445,8 +445,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -p $PROJECT_ID -l $REGION $BUCKET_NAME" - ] + "! gcloud storage buckets create --project=$PROJECT_ID --location=$REGION $BUCKET_NAME" ] }, { "cell_type": "markdown", @@ -465,8 +464,7 @@ }, "outputs": [], "source": [ - "! gsutil ls -al $BUCKET_NAME" - ] + "! gcloud storage ls --all-versions --long $BUCKET_NAME" ] }, { "cell_type": "markdown", @@ -950,8 +948,7 @@ }, "outputs": [], "source": [ - "!gsutil cp -r $VERTEX_MODEL_PATH $BUCKET_NAME/obj_detection_model_vertex" - ] + "!gcloud storage cp --recursive $VERTEX_MODEL_PATH $BUCKET_NAME/obj_detection_model_vertex" ] }, { "cell_type": "code", @@ -961,8 +958,7 @@ }, "outputs": [], "source": [ - "!gsutil ls $BUCKET_NAME" - ] + "!gcloud storage ls $BUCKET_NAME" ] }, { "cell_type": "markdown", @@ -1297,8 +1293,7 @@ "--quiet\n", "\n", "# Delete Cloud Storage objects that were created\n", - "#! gsutil -m rm -r $BUCKET_NAME" - ] + "#! gcloud storage rm --recursive $BUCKET_NAME" ] } ], "metadata": { diff --git a/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb b/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb index 1a3cb2f36..8d72c5d4f 100644 --- a/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb +++ b/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb @@ -303,8 +303,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}" - ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] }, { "cell_type": "markdown", @@ -564,10 +563,7 @@ "# set the local file names\n", "USERS_AVRO_FN = \"users.avro\"\n", "MOVIES_AVRO_FN = \"movies.avro\"\n", - "# copy the files using gsutil\n", - "! gsutil cp $GCS_USERS_AVRO_URI $USERS_AVRO_FN\n", - "! gsutil cp $GCS_MOVIES_AVRO_URI $MOVIES_AVRO_FN" - ] + "# copy the files using gcloud storage\n", "! gcloud storage cp $GCS_USERS_AVRO_URI $USERS_AVRO_FN\n", "! gcloud storage cp $GCS_MOVIES_AVRO_URI $MOVIES_AVRO_FN" ] }, { "cell_type": "markdown", @@ -1127,8 +1123,7 @@ "# Delete Cloud Storage objects that were created\n", "delete_bucket = False # Set True for deletion\n", "if delete_bucket:\n", - " ! gsutil -m rm -r $BUCKET_URI" - ] + " ! gcloud storage rm --recursive $BUCKET_URI" ] } ], "metadata": { diff --git a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb index 651b441fa..7a2135100 100644 --- a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb +++ b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb @@ -296,8 +296,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}" - ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] }, { "cell_type": "markdown", @@ -369,10 +368,10 @@ }, "outputs": [], "source": [ - "! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI\n", + "# Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop.\n! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectCreator\n", "\n", - "! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI" - ] + # Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop. + ! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectViewer ] }, { "cell_type": "markdown", @@ -1068,14 +1067,11 @@ " + \"/evaluation_metrics\"\n", " )\n", " if tf.io.gfile.exists(EXECUTE_OUTPUT):\n", - " ! gsutil cat $EXECUTE_OUTPUT\n", - " return EXECUTE_OUTPUT\n", + " ! gcloud storage cat $EXECUTE_OUTPUT\n", " return EXECUTE_OUTPUT\n", " elif tf.io.gfile.exists(GCP_RESOURCES):\n", - " ! gsutil cat $GCP_RESOURCES\n", - " return GCP_RESOURCES\n", + " ! gcloud storage cat $GCP_RESOURCES\n", " return GCP_RESOURCES\n", " elif tf.io.gfile.exists(EVAL_METRICS):\n", - " ! gsutil cat $EVAL_METRICS\n", - " return EVAL_METRICS\n", + " ! gcloud storage cat $EVAL_METRICS\n", " return EVAL_METRICS\n", "\n", " return None\n", "\n", @@ -1083,15 +1079,13 @@ "print(\"model-upload\")\n", "artifacts = print_pipeline_output(job, \"model-upload\")\n", "print(\"\\n\")\n", - "output = !gsutil cat $artifacts\n", - "print(output)\n", + "output = !gcloud storage cat $artifacts\n", "print(output)\n", "output = json.loads(output[0])\n", "model_id = output[\"artifacts\"][\"model\"][\"artifacts\"][0][\"metadata\"][\"resourceName\"]\n", "print(\"model-batch-predict\")\n", "artifacts = print_pipeline_output(job, \"model-batch-predict\")\n", "print(\"\\n\")\n", - "output = !gsutil cat $artifacts\n", - "output = json.loads(output[0])\n", + "output = !gcloud storage cat $artifacts\n", "output = json.loads(output[0])\n", "batch_job_id = output[\"artifacts\"][\"batchpredictionjob\"][\"artifacts\"][0][\"metadata\"][\n", " \"resourceName\"\n", "]" @@ -1133,8 +1127,7 @@ "# Delete the Cloud Storage bucket\n", "delete_bucket = False # Set True for deletion\n", "if delete_bucket:\n", - " ! gsutil rm -r $BUCKET_URI\n", - "\n", + " ! gcloud storage rm --recursive $BUCKET_URI\n", "\n", "# Remove the locally generated files\n", "! rm custom_model_training_spec.yaml\n", "! rm -rf custom" diff --git a/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb b/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb index 52c4324c5..b4564bb80 100644 --- a/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb +++ b/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb @@ -307,8 +307,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}" - ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] }, { "cell_type": "markdown", @@ -975,8 +974,7 @@ }, "outputs": [], "source": [ - "! gsutil cp gs://cloud-ml-data/img/flower_photos/daisy/100080576_f52e8ee070_n.jpg test.jpg" - ] + "! gcloud storage cp gs://cloud-ml-data/img/flower_photos/daisy/100080576_f52e8ee070_n.jpg test.jpg" ] }, { "cell_type": "code", @@ -1171,8 +1169,7 @@ " print(e)\n", "\n", "if delete_bucket:\n", - " ! gsutil rm -rf {BUCKET_URI}\n", - "\n", + " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}\n", "\n", "if delete_generated_files:\n", " ! rm -rf \"test.jpg\" \"instances.json\"" ] diff --git a/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb b/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb index 27887bace..372d2f913 100644 --- a/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb +++ b/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb @@ -297,8 +297,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l $LOCATION -p $PROJECT_ID $BUCKET_URI" - ] + "! gcloud storage buckets create --location $LOCATION --project $PROJECT_ID $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -957,8 +956,7 @@ "# Delete bucket\n", "delete_bucket = False\n", "if delete_bucket or os.getenv(\"IS_TESTING\"):\n", - " ! gsutil -m rm -r $BUCKET_URI" - ] + " ! gcloud storage rm --recursive $BUCKET_URI" ] } ], "metadata": { diff --git a/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb b/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb index bc32cfe78..0c4e176d1 100644 --- a/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb +++ b/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb @@ -282,8 +282,7 @@ }, "outputs": [], "source": [ - "! gsutil mb -l $LOCATION -p $PROJECT_ID $BUCKET_URI" - ] + "! gcloud storage buckets create --location=$LOCATION --project=$PROJECT_ID $BUCKET_URI" ] }, { "cell_type": "markdown", @@ -1203,8 +1202,7 @@ }, "outputs": [], "source": [ - "! gsutil ls $gcs_output_uri_prefix" - ] + "! gcloud storage ls $gcs_output_uri_prefix" ] }, { "cell_type": "markdown", @@ -1242,16 +1240,14 @@ "# Set this to true only if you'd like to delete your artifact repository\n", "delete_artifact_repository = False\n", "\n", - "! gsutil rm -rf $gcs_output_uri_prefix\n", - "\n", + "! gcloud storage rm --recursive --continue-on-error $gcs_output_uri_prefix\n", "\n", "! rm -rf ./trainer\n", "\n", "if delete_artifact_repository:\n", " !gcloud artifacts repositories delete {PRIVATE_REPO} --location={LOCATION} --quiet\n", "\n", "if delete_bucket:\n", - " ! gsutil rm -r $BUCKET_URI\n", - "\n", + " ! gcloud storage rm --recursive $BUCKET_URI\n", "\n", "if delete_tensorboard:\n", " tensorboard.delete()" ] From 1115222924e56f2f59fb6f202bcbc4509d00cc2b Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Fri, 12 Dec 2025 14:09:56 +0000 Subject: [PATCH 2/4] changes for 4315 --- .../model_garden_pytorch_mixtral_deployment.ipynb | 14 ++++++++------ ...ustom_model_training_and_batch_prediction.ipynb | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb index 02ae0d477..5428f2e86 100644 --- a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb +++ b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb @@ -174,10 +174,11 @@ "if BUCKET_URI is None or BUCKET_URI.strip() == \"\" or BUCKET_URI == \"gs://\":\n", " BUCKET_URI = f\"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}\"\n", " BUCKET_NAME = \"/\".join(BUCKET_URI.split(\"/\")[:3])\n", - " ! gcloud storage buckets create --location {REGION} {BUCKET_URI}\n", "else:\n", + " ! gcloud storage buckets create --location {REGION} {BUCKET_URI}\n", + "else:\n", " assert BUCKET_URI.startswith(\"gs://\"), \"BUCKET_URI must start with `gs://`.\"\n", - " # Note: The format of the full listing output is different. gcloud storage uses a title case for keys and will not display a field if its value is \"None\".\n", - " shell_output = ! gcloud storage ls --full --buckets {BUCKET_NAME} | grep \"Location constraint:\" | sed \"s/Location constraint://\"\n", " bucket_region = shell_output[0].strip().lower()\n", + " shell_output = ! gcloud storage ls --full --buckets {BUCKET_NAME} | grep \"Location Constraint:\" | sed \"s/Location Constraint://\"\n", + " bucket_region = shell_output[0].strip().lower()\n", " if bucket_region != REGION:\n", " raise ValueError(\n", " \"Bucket region %s is different from notebook region %s\"\n", @@ -201,8 +202,8 @@ "\n", "\n", "# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket\n", - "# Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop.\n", - "! gcloud storage buckets add-iam-policy-binding $BUCKET_NAME --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.admin\n", "\n", + "! gcloud storage buckets add-iam-policy-binding $BUCKET_NAME --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.admin\n", + "\n", "! gcloud config set project $PROJECT_ID\n", "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n", "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/aiplatform.user\"" @@ -825,7 +826,8 @@ "\n", "delete_bucket = False # @param {type:\"boolean\"}\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_NAME" ] + " ! gcloud storage rm --recursive $BUCKET_NAME" + ] } ], "metadata": { diff --git a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb index 7a2135100..d56022ded 100644 --- a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb +++ b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb @@ -368,10 +368,9 @@ }, "outputs": [], "source": [ - "# Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop.\n! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectCreator\n", + "! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectCreator\n", "\n", - # Note: Migrating scripts using gsutil iam ch is more complex than get or set. You need to replace the single iam ch command with a series of gcloud storage bucket add-iam-policy-binding and/or gcloud storage bucket remove-iam-policy-binding commands, or replicate the read-modify-write loop. - ! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectViewer ] + "! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectViewer" ] }, { "cell_type": "markdown", From 5506a3c6c089b95d17b89140980c6049b7a94348 Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Tue, 16 Dec 2025 12:43:11 +0000 Subject: [PATCH 3/4] Apply automated linter fixes --- .../prediction_featurestore_integration.ipynb | 2991 ++++++++--------- .../get_started_with_model_registry.ipynb | 9 +- .../model_garden_openai_api_llama3_1.ipynb | 6 +- ...el_garden_pytorch_mixtral_deployment.ipynb | 4 +- ...object_detection_on_vertex_endpoints.ipynb | 20 +- .../sdk-feature-store-pandas.ipynb | 11 +- ..._model_training_and_batch_prediction.ipynb | 24 +- ...tarted_with_vertex_private_endpoints.ipynb | 9 +- .../wide_and_deep_on_vertex_pipelines.ipynb | 6 +- ...ertex_training_with_custom_container.ipynb | 12 +- 10 files changed, 1560 insertions(+), 1532 deletions(-) diff --git a/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb b/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb index d38530fba..21e9cc472 100644 --- a/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb +++ b/community-content/prediction_featurestore_integration/prediction_featurestore_integration.ipynb @@ -1,1501 +1,1494 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "7aa1a6f5db32" - }, - "source": [ - "# Overview" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0a2a426f71c0" - }, - "source": [ - "This notebook is based on this [Prediction and Feature Store Online Serving](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/feature_store/mobile_gaming/mobile_gaming_feature_store.ipynb) notebook and [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bb819819a5c9" - }, - "source": [ - "### Dataset\n", - "\n", - "The dataset is the public sample export data from an actual mobile game app called \"Flood It!\" (Android, iOS)\n", - "\n", - "### Model\n", - "\n", - "The model you use in this notebook is based on [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml). The idea behind this model is that your company has extensive log data describing how your game users have interacted with the site. The raw data contains the following categories of information:\n", - "\n", - "- identity - unique player identitity numbers\n", - "- demographic features - information about the player, such as the geographic region in which a player is located\n", - "- behavioral features - counts of the number of times a player has triggered certain game events, such as reaching a new level\n", - "- churn propensity - this is the label or target feature, it provides an estimated probability that this player will churn, i.e. stop being an active player.\n", - "\n", - "The blog article referenced above explains how to use BigQuery to store the raw data, pre-process the data for machine learning, and train the corresponding model. Because this notebook focuses on model monitoring, rather than training models, you're going to reuse a pre-trained version of this model, which has been exported to Cloud Storage, which is stored in `gs://mco-mm/churn`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0e87d0008c86" - }, - "source": [ - "# Basic set up" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2ced96588e13" - }, - "source": [ - "## Install packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4811add5cd1c" - }, - "outputs": [], - "source": [ - "! pip install google-cloud-aiplatform" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c7947573e336" - }, - "source": [ - "## Set up project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "94f36c0f4a00" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "PROJECT_ID = \"\"\n", - "\n", - "# Get your Google Cloud project ID from gcloud\n", - "if not os.getenv(\"IS_TESTING\"):\n", - " shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null\n", - " PROJECT_ID = shell_output[0]\n", - " print(\"Project ID: \", PROJECT_ID)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7d439758446b" - }, - "outputs": [], - "source": [ - "if PROJECT_ID == \"\" or PROJECT_ID is None:\n", - " PROJECT_ID = \"[your-project-id]\" # @param {type:\"string\"}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "56a76e9ebc4d" - }, - "outputs": [], - "source": [ - "shell_output = ! gcloud projects list --filter=\"PROJECT_ID:'{PROJECT_ID}'\" --format='value(PROJECT_NUMBER)'\n", - "PROJECT_NUMBER = shell_output[0]\n", - "print(\"Project Number:\", PROJECT_NUMBER)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a74685b6ae29" - }, - "outputs": [], - "source": [ - "! gcloud config set project $PROJECT_ID" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7fdcbacfa8ad" - }, - "outputs": [], - "source": [ - "REGION = \"\"\n", - "\n", - "if REGION == \"\" or REGION is None:\n", - " REGION = \"us-central1\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "17c464cec341" - }, - "source": [ - "## Timestamp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9fa11b3e7936" - }, - "outputs": [], - "source": [ - "from datetime import datetime\n", - "\n", - "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c03afe801c7b" - }, - "source": [ - "## Authenticate your Google Cloud account" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "254b58f37761" - }, - "source": [ - "If you are using Vertex AI Workbench Notebooks, your environment is already authenticated. Skip this step.\n", - "\n", - "If you are using Colab, run the cell below and follow the instructions when prompted to authenticate your account via oAuth.\n", - "\n", - "Otherwise, follow these steps:\n", - "\n", - "1. In the Cloud Console, go to the Create service account key page.\n", - "\n", - "2. Click Create service account.\n", - "\n", - "3. In the Service account name field, enter a name, and click Create.\n", - "\n", - "4. In the Grant this service account access to project section, click the Role drop-down list and add the following roles:\n", - "\n", - "- BigQuery Admin\n", - "- Storage Admin\n", - "- Storage Object Admin\n", - "- Vertex AI Administrator\n", - "- Vertex AI Feature Store Admin\n", - "\n", - "5. Click Create. A JSON file that contains your key downloads to your local environment.\n", - "\n", - "6. Enter the path to your service account key as the `GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6a08e2a6c84f" - }, - "outputs": [], - "source": [ - "# If you are running this notebook in Colab, run this cell and follow the\n", - "# instructions to authenticate your GCP account. This provides access to your\n", - "# Cloud Storage bucket and lets you submit training jobs and prediction\n", - "# requests.\n", - "\n", - "import os\n", - "import sys\n", - "\n", - "# If on Vertex AI Workbench, then don't execute this code\n", - "IS_COLAB = \"google.colab\" in sys.modules\n", - "if not os.path.exists(\"/opt/deeplearning/metadata/env_version\") and not os.getenv(\n", - " \"DL_ANACONDA_HOME\"\n", - "):\n", - " if \"google.colab\" in sys.modules:\n", - " from google.colab import auth as google_auth\n", - "\n", - " google_auth.authenticate_user()\n", - "\n", - " # If you are running this notebook locally, replace the string below with the\n", - " # path to your service account key and run this cell to authenticate your GCP\n", - " # account.\n", - " elif not os.getenv(\"IS_TESTING\"):\n", - " %env GOOGLE_APPLICATION_CREDENTIALS ''" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2f52dced4061" - }, - "source": [ - "Create a bucket and copy the exported model to it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "12c9381edd2a" - }, - "outputs": [], - "source": [ - "BUCKET_NAME = \"[your-bucket-name]\" # @param {type:\"string\"}\n", - "BUCKET_URI = f\"gs://{BUCKET_NAME}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "50c6cda053e1" - }, - "outputs": [], - "source": [ - "if BUCKET_NAME == \"\" or BUCKET_NAME is None or BUCKET_NAME == \"[your-bucket-name]\":\n", - " BUCKET_NAME = PROJECT_ID + \"-aip-\" + TIMESTAMP\n", - " BUCKET_URI = f\"gs://{BUCKET_NAME}\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "197c22fada47" - }, - "source": [ - "Only if your bucket doesn't already exist: Run the following cell to create your Cloud Storage bucket." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2de44a514cd1" - }, - "outputs": [], - "source": [ - "! gcloud storage buckets create --location=$REGION --project=$PROJECT_ID $BUCKET_URI" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ca57dc423195" - }, - "outputs": [], - "source": [ - "! gcloud storage ls --all-versions --long $BUCKET_URI" ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0a6202578b94" - }, - "source": [ - "Copy the trained model to your bucket" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6b2501fc7f01" - }, - "outputs": [], - "source": [ - "! gcloud storage cp --recursive gs://mco-mm/churn/* $BUCKET_URI" ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "c672b6901814" - }, - "source": [ - "### Create a service account\n", - "\n", - "We need a service account for this new feature because the prediction workload's credential does not have access to Feature Store. Create one and grant the role `roles/aiplatform.serviceAgent` to it, which will grant it the access to most prediction's resources and Feature Store." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "534b78bcc16d" - }, - "outputs": [], - "source": [ - "SA_NAME = \"prediction-and-fs-testing\"\n", - "SA_DESCRIPTION = '\"SA to test Prediction and Feature Store integration\"'\n", - "DISPLAY_NAME = \"prediction-and-fs-testing\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3cf1c055b80e" - }, - "outputs": [], - "source": [ - "!gcloud iam service-accounts create $SA_NAME \\\n", - " --description=$SA_DESCRIPTION \\\n", - " --display-name=$DISPLAY_NAME" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fef2f4529b22" - }, - "outputs": [], - "source": [ - "# SERVICE_ACCOUNT = \"prediction-and-fs@bon-test-0.iam.gserviceaccount.com\"\n", - "SERVICE_ACCOUNT = (\n", - " f\"{SA_NAME}@{PROJECT_ID}.iam.gserviceaccount.com\" # @param {type:\"string\"}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8d7d03ced198" - }, - "source": [ - "Grant the Service account the `Storage Admin` and `Vertex AI Feature Store Data Viewer` role to access the artifacts in GCS and data in Feature Store" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "b5e3ad275dfe" - }, - "outputs": [], - "source": [ - "!gcloud projects add-iam-policy-binding $PROJECT_ID \\\n", - " --member=serviceAccount:$SERVICE_ACCOUNT \\\n", - " --role=roles/storage.objectAdmin;\n", - "\n", - "!gcloud projects add-iam-policy-binding $PROJECT_ID \\\n", - " --member=serviceAccount:$SERVICE_ACCOUNT \\\n", - " --role=roles/aiplatform.featurestoreDataViewer;" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d3938f6d37a1" - }, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7759fe720d45" - }, - "outputs": [], - "source": [ - "# General\n", - "import os\n", - "import sys\n", - "\n", - "# Vertex AI and its Feature Store\n", - "from google.cloud import aiplatform\n", - "from google.cloud.aiplatform import Featurestore" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2eb5493cf2ac" - }, - "source": [ - "**NOTE:** The feature is only available in `autopush` now" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9b6b5c58669d" - }, - "outputs": [], - "source": [ - "aiplatform.constants.base.API_BASE_PATH = \"autopush-aiplatform.sandbox.googleapis.com\"\n", - "aiplatform.constants.base.PREDICTION_API_BASE_PATH = (\n", - " \"autopush-prediction-aiplatform.sandbox.googleapis.com\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7def96de8098" - }, - "outputs": [], - "source": [ - "aiplatform.init(project=PROJECT_ID, location=REGION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "56a185de336f" - }, - "source": [ - "# Create Feature Store" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "363c77a9e9e3" - }, - "outputs": [], - "source": [ - "FEATURESTORE_ID = \"mobile_gaming_\" + TIMESTAMP # @param {type:\"string\"}\n", - "\n", - "# Vertex AI Feature store\n", - "ONLINE_STORE_NODES_COUNT = 5\n", - "DEMOGRAPHIC_ENTITY_ID = \"demographic\"\n", - "BEHAVIOR_ENTITY_ID = \"behavior\"\n", - "FEATURE_TIME = \"timestamp\"\n", - "ENTITY_ID_FIELD = \"user_pseudo_id\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bcaa5c3644f0" - }, - "source": [ - "We will use the exported sample data of the dataset used in this [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "72a36025d162" - }, - "outputs": [], - "source": [ - "SOURCE_URI = \"gs://featurestore_prediction_integration/data/mobile_gaming_dataset.csv\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "85f09125566f" - }, - "source": [ - "## Create Feature Store" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "764aa2b60016" - }, - "outputs": [], - "source": [ - "try:\n", - " mobile_gaming_feature_store = Featurestore.create(\n", - " featurestore_id=FEATURESTORE_ID,\n", - " online_store_fixed_node_count=ONLINE_STORE_NODES_COUNT,\n", - " sync=True,\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)\n", - "else:\n", - " FEATURESTORE_RESOURCE_NAME = mobile_gaming_feature_store.resource_name\n", - " print(f\"Feature store created: {FEATURESTORE_RESOURCE_NAME}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7d4ff784a9f7" - }, - "source": [ - "### Create Entities" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "51e62e67d4e8" - }, - "outputs": [], - "source": [ - "try:\n", - " demographic_entity_type = mobile_gaming_feature_store.create_entity_type(\n", - " entity_type_id=DEMOGRAPHIC_ENTITY_ID,\n", - " description=\"User demographic Entity\",\n", - " sync=True,\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)\n", - "else:\n", - " DEMOGRAPHIC_ENTITY_RESOURCE_NAME = demographic_entity_type.resource_name\n", - " print(\"Entity type name is\", DEMOGRAPHIC_ENTITY_RESOURCE_NAME)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6ae913f7e9cc" - }, - "outputs": [], - "source": [ - "try:\n", - " behavior_entity_type = mobile_gaming_feature_store.create_entity_type(\n", - " entity_type_id=BEHAVIOR_ENTITY_ID, description=\"User behavior Entity\", sync=True\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)\n", - "else:\n", - " BEHAVIOR_ENTITY_RESOURCE_NAME = behavior_entity_type.resource_name\n", - " print(\"Entity type name is\", BEHAVIOR_ENTITY_RESOURCE_NAME)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ae2b5655049b" - }, - "source": [ - "### Create Features" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "809f781e797a" - }, - "source": [ - "#### Feature Config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "22d855f118ea" - }, - "outputs": [], - "source": [ - "demographic_feature_configs = {\n", - " \"country\": {\n", - " \"value_type\": \"STRING\",\n", - " \"description\": \"The country of customer\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"operating_system\": {\n", - " \"value_type\": \"STRING\",\n", - " \"description\": \"The operating system of device\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"language\": {\n", - " \"value_type\": \"STRING\",\n", - " \"description\": \"The language of device\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"user_pseudo_id\": {\n", - " \"value_type\": \"STRING\",\n", - " \"description\": \"User pseudo id\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - "}\n", - "\n", - "behavior_feature_configs = {\n", - " \"cnt_user_engagement\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user engagement level\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_level_start_quickplay\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user engagement with start level\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_level_end_quickplay\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user engagement with end level\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_level_complete_quickplay\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user engagement with complete status\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_level_reset_quickplay\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user engagement with reset status\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_post_score\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user score\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_spend_virtual_currency\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user virtual amount\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_ad_reward\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user reward\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_challenge_a_friend\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user challenges with friends\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_completed_5_levels\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user level 5 completed\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"cnt_use_extra_steps\": {\n", - " \"value_type\": \"DOUBLE\",\n", - " \"description\": \"A variable of user extra steps\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"month\": {\n", - " \"value_type\": \"INT64\",\n", - " \"description\": \"First touch month\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"julianday\": {\n", - " \"value_type\": \"INT64\",\n", - " \"description\": \"First touch julian day\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - " \"dayofweek\": {\n", - " \"value_type\": \"INT64\",\n", - " \"description\": \"First touch day of week\",\n", - " \"labels\": {\"status\": \"passed\"},\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1b72bf136987" - }, - "source": [ - "#### Create features using `batch_create_features` method" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9c2769834702" - }, - "outputs": [], - "source": [ - "try:\n", - " demographic_entity_type.batch_create_features(\n", - " feature_configs=demographic_feature_configs, sync=True\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)\n", - "else:\n", - " for feature in demographic_entity_type.list_features():\n", - " print(\"\")\n", - " print(f\"The resource name of {feature.name} feature is\", feature.resource_name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "68b07d3b2bf0" - }, - "outputs": [], - "source": [ - "try:\n", - " behavior_entity_type.batch_create_features(\n", - " feature_configs=behavior_feature_configs, sync=True\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)\n", - "else:\n", - " for feature in behavior_entity_type.list_features():\n", - " print(\"\")\n", - " print(f\"The resource name of {feature.name} feature is\", feature.resource_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "34bf9fc11ae3" - }, - "source": [ - "### Ingest features " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "92e92c854028" - }, - "outputs": [], - "source": [ - "DEMOGRAPHIC_FEATURES_IDS = [\n", - " feature.name for feature in demographic_entity_type.list_features()\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "50a7e935a870" - }, - "outputs": [], - "source": [ - "try:\n", - " demographic_entity_type.ingest_from_gcs(\n", - " feature_ids=DEMOGRAPHIC_FEATURES_IDS,\n", - " feature_time=FEATURE_TIME,\n", - " gcs_source_uris=SOURCE_URI,\n", - " gcs_source_type=\"csv\",\n", - " entity_id_field=ENTITY_ID_FIELD,\n", - " disable_online_serving=False,\n", - " worker_count=10,\n", - " sync=True,\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dcbb4fa32e40" - }, - "outputs": [], - "source": [ - "BEHAVIOR_FEATURES_IDS = [\n", - " feature.name for feature in behavior_entity_type.list_features()\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a36facc52807" - }, - "outputs": [], - "source": [ - "try:\n", - " behavior_entity_type.ingest_from_gcs(\n", - " feature_ids=BEHAVIOR_FEATURES_IDS,\n", - " feature_time=FEATURE_TIME,\n", - " gcs_source_uris=SOURCE_URI,\n", - " gcs_source_type=\"csv\",\n", - " entity_id_field=ENTITY_ID_FIELD,\n", - " disable_online_serving=False,\n", - " worker_count=10,\n", - " sync=True,\n", - " )\n", - "except RuntimeError as error:\n", - " print(error)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bec74a816daf" - }, - "source": [ - "# Create Feature Fetch Config" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b1131367aeec" - }, - "source": [ - "## Feature fetch config proto:\n", - "\n", - "```protobuf\n", - "message FeatureFetchConfig {\n", - " // The format of the internal prediction request auto-created after features\n", - " // are fetched. Prediction currently supports XGBoost, TensorFlow and\n", - " // scikit-learn, and will soon start to support Pytorch. Among these\n", - " // frameworks, XGBoost supports array input format only (i.e. input\n", - " // features are in the form of an array), whereas the other three frameworks\n", - " // can allow both dictionary format and array format inputs.\n", - " ModelInputFormat model_input_format = 3;\n", - " enum ModelInputFormat {\n", - " MODEL_INPUT_FORMAT_UNSPECIFIED = 0;\n", - " ARRAY = 1;\n", - " DICT = 2;\n", - " }\n", - "\n", - " // Specifying details of the prediction input\n", - " repeated Feature features = 4;\n", - " message Feature {\n", - " // When internal_request_format = DICT, this value_key is used\n", - " // for the Internal Prediction Request as the key to the feature value.\n", - " // In the FeatureFetchConfig, a pass-through feature can be represented by\n", - " // a Feature message with just a value_key.\n", - " string value_key = 1;\n", - "\n", - " // Defines where from featurestore(s) does each feature comes.\n", - " FeatureSource feature_source = 2;\n", - " message FeatureSource {\n", - " // From a high level, there are two fields in FeatureSource, where\n", - " // entity_id_key is about the \"row\" from where a value is fetched, and\n", - " // feature_resource_path is about the \"column\" from where a value is\n", - " // fetched.\n", - "\n", - " // Specifies which key holds the entity ID in the external request sent by\n", - " // the user to the prediction service.\n", - " string entity_id_key = 1;\n", - "\n", - " // The resource path in URL format, to identify the entity type.\n", - " // The format should be\n", - " // projects/PROJECT/locations/LOCATION/featurestores/FEATURESTORE_ID/entityTypes/ENTITY_TYPE_ID/\n", - " // e.g.\n", - " // \"projects/my-feature-store-project/locations/us-central1/featurestores/movie_predictions/entityTypes/movies/\"\n", - " string entity_type = 2;\n", - "\n", - " // The feature ID defined in feature store.\n", - " string feature_id = 3;\n", - " }\n", - " }\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "092d012dfd27" - }, - "source": [ - "## Generate Feature fetch config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7ee405070fe4" - }, - "outputs": [], - "source": [ - "FEATURE_FETCH_CONFIG_TEMPLATE = \"\"\"modelInputFormat: DICT\n", - "features:\n", - "- valueKey: user_pseudo_id\n", - "- valueKey: country\n", - " featureSource:\n", - " entityIdKey: demographic\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", - " featureId: country\n", - "- valueKey: operating_system\n", - " featureSource:\n", - " entityIdKey: demographic\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", - " featureId: operating_system\n", - "- valueKey: language\n", - " featureSource:\n", - " entityIdKey: demographic\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", - " featureId: language\n", - "- valueKey: cnt_user_engagement\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_user_engagement\n", - "- valueKey: cnt_level_start_quickplay\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_level_start_quickplay\n", - "- valueKey: cnt_level_end_quickplay\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_level_end_quickplay\n", - "- valueKey: cnt_level_complete_quickplay\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_level_complete_quickplay\n", - "- valueKey: cnt_level_reset_quickplay\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_level_reset_quickplay\n", - "- valueKey: cnt_post_score\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_post_score\n", - "- valueKey: cnt_spend_virtual_currency\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_spend_virtual_currency\n", - "- valueKey: cnt_ad_reward\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_ad_reward\n", - "- valueKey: cnt_challenge_a_friend\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_challenge_a_friend\n", - "- valueKey: cnt_completed_5_levels\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_completed_5_levels\n", - "- valueKey: cnt_use_extra_steps\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: cnt_use_extra_steps\n", - "- valueKey: month\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: month\n", - "- valueKey: julianday\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: julianday\n", - "- valueKey: dayofweek\n", - " featureSource:\n", - " entityIdKey: behavior\n", - " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", - " featureId: dayofweek\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "767d211e77ff" - }, - "outputs": [], - "source": [ - "feature_fetch_config = FEATURE_FETCH_CONFIG_TEMPLATE.format(\n", - " PROJECT_NUMBER=PROJECT_NUMBER, REGION=REGION, FEATURESTORE_ID=FEATURESTORE_ID\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "be646e50a3aa" - }, - "outputs": [], - "source": [ - "print(feature_fetch_config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "f4b685628924" - }, - "outputs": [], - "source": [ - "with open(\"prediction_featuresstore_fetch_config.yaml\", \"w\") as f:\n", - " f.write(feature_fetch_config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9156a38ac8b4" - }, - "outputs": [], - "source": [ - "# Remove if the file already exists\n", - "!gcloud storage rm $BUCKET_URI/prediction_featuresstore_fetch_config.yaml" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9ac494223985" - }, - "outputs": [], - "source": [ - "!gcloud storage cp prediction_featuresstore_fetch_config.yaml $BUCKET_URI" ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5e58334c94b6" - }, - "source": [ - "# Integrate with Vertex Prediction" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b4dfbffa88e0" - }, - "source": [ - "## Upload Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "43b28a15eae9" - }, - "outputs": [], - "source": [ - "DEPLOY_IMAGE = \"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-7:latest\"\n", - "DISPLAY_NAME = \"mobile_gaming_featureStore_integration_\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "03e6cf99f099" - }, - "outputs": [], - "source": [ - "model = aiplatform.Model.upload(\n", - " display_name=DISPLAY_NAME + TIMESTAMP,\n", - " artifact_uri=BUCKET_URI,\n", - " serving_container_image_uri=DEPLOY_IMAGE,\n", - " sync=False,\n", - ")\n", - "\n", - "model.wait()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9bb077790432" - }, - "source": [ - "## Online Prediction" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eaaf2d7adf0d" - }, - "source": [ - "### Deploy Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dc5406d7e46c" - }, - "outputs": [], - "source": [ - "DEPLOYED_NAME = DISPLAY_NAME + TIMESTAMP\n", - "\n", - "TRAFFIC_SPLIT = {\"0\": 100}\n", - "\n", - "MACHINE_TYPE = \"n1-standard-4\"\n", - "\n", - "MIN_NODES = 1\n", - "MAX_NODES = 1\n", - "\n", - "endpoint = model.deploy(\n", - " deployed_model_display_name=DEPLOYED_NAME,\n", - " traffic_split=TRAFFIC_SPLIT,\n", - " machine_type=MACHINE_TYPE,\n", - " min_replica_count=MIN_NODES,\n", - " max_replica_count=MAX_NODES,\n", - " service_account=SERVICE_ACCOUNT,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fc1e1f273ed2" - }, - "source": [ - "### Predict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "c83e8207bf74" - }, - "outputs": [], - "source": [ - "default_pred_request = [\n", - " {\n", - " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"country\": \"Australia\",\n", - " \"operating_system\": \"IOS\",\n", - " \"language\": \"en-au\",\n", - " \"cnt_user_engagement\": 3.0,\n", - " \"cnt_level_start_quickplay\": 1.0,\n", - " \"cnt_level_end_quickplay\": 0.0,\n", - " \"cnt_level_complete_quickplay\": 0.0,\n", - " \"cnt_level_reset_quickplay\": 0.0,\n", - " \"cnt_post_score\": 0.0,\n", - " \"cnt_spend_virtual_currency\": 0.0,\n", - " \"cnt_ad_reward\": 0.0,\n", - " \"cnt_challenge_a_friend\": 0.0,\n", - " \"cnt_completed_5_levels\": 0.0,\n", - " \"cnt_use_extra_steps\": 0.0,\n", - " \"month\": 7,\n", - " \"julianday\": 194,\n", - " \"dayofweek\": 6,\n", - " },\n", - " {\n", - " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"country\": \"United States\",\n", - " \"operating_system\": \"IOS\",\n", - " \"language\": \"en-us\",\n", - " \"cnt_user_engagement\": 1.0,\n", - " \"cnt_level_start_quickplay\": 1.0,\n", - " \"cnt_level_end_quickplay\": 0.0,\n", - " \"cnt_level_complete_quickplay\": 0.0,\n", - " \"cnt_level_reset_quickplay\": 0.0,\n", - " \"cnt_post_score\": 0.0,\n", - " \"cnt_spend_virtual_currency\": 0.0,\n", - " \"cnt_ad_reward\": 0.0,\n", - " \"cnt_challenge_a_friend\": 0.0,\n", - " \"cnt_completed_5_levels\": 0.0,\n", - " \"cnt_use_extra_steps\": 0.0,\n", - " \"month\": 6,\n", - " \"julianday\": 173,\n", - " \"dayofweek\": 6,\n", - " },\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "430fe30f0878" - }, - "outputs": [], - "source": [ - "fs_pred_request = [\n", - " {\n", - " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"demographic\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"behavior\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " },\n", - " {\n", - " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"demographic\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"behavior\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " },\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "496710f4bb97" - }, - "source": [ - "Features fetched from Feature Store can be overriden" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a8c4c871a305" - }, - "outputs": [], - "source": [ - "fs_pred_request_with_overridden_features = [\n", - " {\n", - " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"demographic\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"behavior\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", - " \"cnt_ad_reward\": 10.0,\n", - " \"cnt_challenge_a_friend\": 10.0,\n", - " \"cnt_completed_5_levels\": 10.0,\n", - " \"cnt_use_extra_steps\": 10.0,\n", - " },\n", - " {\n", - " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"demographic\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"behavior\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", - " \"cnt_ad_reward\": 10.0,\n", - " \"cnt_challenge_a_friend\": 10.0,\n", - " \"cnt_completed_5_levels\": 10.0,\n", - " \"cnt_use_extra_steps\": 10.0,\n", - " },\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7bd959dc40c6" - }, - "outputs": [], - "source": [ - "endpoint.predict([default_pred_request[0]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bd0a77f5f49f" - }, - "outputs": [], - "source": [ - "default_response = endpoint.predict(default_pred_request)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ddf8a41c4844" - }, - "outputs": [], - "source": [ - "print(default_response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "36ddbb811f47" - }, - "source": [ - "### Single instance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "d1c5c7ae4015" - }, - "outputs": [], - "source": [ - "endpoint.predict([fs_pred_request[0]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "78d40546ec32" - }, - "outputs": [], - "source": [ - "endpoint.predict([fs_pred_request_with_overridden_features[0]])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "81e5fde2d430" - }, - "source": [ - "### Multiple instances" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "64e4735f3f6a" - }, - "outputs": [], - "source": [ - "fs_response = endpoint.predict(fs_pred_request)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "12ff29927bf9" - }, - "outputs": [], - "source": [ - "print(fs_response)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8f842f1b6b32" - }, - "outputs": [], - "source": [ - "fs_with_overridden_features_response = endpoint.predict(\n", - " fs_pred_request_with_overridden_features\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "705ebe0b6cdf" - }, - "outputs": [], - "source": [ - "print(fs_with_overridden_features_response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "68db6bc5e8b2" - }, - "source": [ - "### Compare response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "912daf9b976b" - }, - "outputs": [], - "source": [ - "print(default_response.predictions == fs_response.predictions)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ce09d9bc5a5" - }, - "source": [ - "# Clean Up" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "c5f566238d9a" - }, - "outputs": [], - "source": [ - "# delete feature store\n", - "mobile_gaming_feature_store.delete(sync=True, force=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "540b19cca2bb" - }, - "outputs": [], - "source": [ - "# delete Vertex AI resources\n", - "endpoint.undeploy_all()\n", - "endpoint.delete()\n", - "model.delete" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "42172d5bc91b" - }, - "outputs": [], - "source": [ - "# Delete bucket\n", - "!gcloud storage rm --recursive $BUCKET_URI" ] - } - ], - "metadata": { - "colab": { - "name": "prediction_featurestore_integration.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "7aa1a6f5db32" + }, + "source": [ + "# Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0a2a426f71c0" + }, + "source": [ + "This notebook is based on this [Prediction and Feature Store Online Serving](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/feature_store/mobile_gaming/mobile_gaming_feature_store.ipynb) notebook and [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bb819819a5c9" + }, + "source": [ + "### Dataset\n", + "\n", + "The dataset is the public sample export data from an actual mobile game app called \"Flood It!\" (Android, iOS)\n", + "\n", + "### Model\n", + "\n", + "The model you use in this notebook is based on [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml). The idea behind this model is that your company has extensive log data describing how your game users have interacted with the site. The raw data contains the following categories of information:\n", + "\n", + "- identity - unique player identitity numbers\n", + "- demographic features - information about the player, such as the geographic region in which a player is located\n", + "- behavioral features - counts of the number of times a player has triggered certain game events, such as reaching a new level\n", + "- churn propensity - this is the label or target feature, it provides an estimated probability that this player will churn, i.e. stop being an active player.\n", + "\n", + "The blog article referenced above explains how to use BigQuery to store the raw data, pre-process the data for machine learning, and train the corresponding model. Because this notebook focuses on model monitoring, rather than training models, you're going to reuse a pre-trained version of this model, which has been exported to Cloud Storage, which is stored in `gs://mco-mm/churn`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0e87d0008c86" + }, + "source": [ + "# Basic set up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ced96588e13" + }, + "source": [ + "## Install packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4811add5cd1c" + }, + "outputs": [], + "source": [ + "! pip install google-cloud-aiplatform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c7947573e336" + }, + "source": [ + "## Set up project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "94f36c0f4a00" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "PROJECT_ID = \"\"\n", + "\n", + "# Get your Google Cloud project ID from gcloud\n", + "if not os.getenv(\"IS_TESTING\"):\n", + " shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null\n", + " PROJECT_ID = shell_output[0]\n", + " print(\"Project ID: \", PROJECT_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7d439758446b" + }, + "outputs": [], + "source": [ + "if PROJECT_ID == \"\" or PROJECT_ID is None:\n", + " PROJECT_ID = \"[your-project-id]\" # @param {type:\"string\"}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "56a76e9ebc4d" + }, + "outputs": [], + "source": [ + "shell_output = ! gcloud projects list --filter=\"PROJECT_ID:'{PROJECT_ID}'\" --format='value(PROJECT_NUMBER)'\n", + "PROJECT_NUMBER = shell_output[0]\n", + "print(\"Project Number:\", PROJECT_NUMBER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a74685b6ae29" + }, + "outputs": [], + "source": [ + "! gcloud config set project $PROJECT_ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7fdcbacfa8ad" + }, + "outputs": [], + "source": [ + "REGION = \"\"\n", + "\n", + "if REGION == \"\" or REGION is None:\n", + " REGION = \"us-central1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "17c464cec341" + }, + "source": [ + "## Timestamp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9fa11b3e7936" + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "TIMESTAMP = datetime.now().strftime(\"%Y%m%d%H%M%S\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c03afe801c7b" + }, + "source": [ + "## Authenticate your Google Cloud account" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "254b58f37761" + }, + "source": [ + "If you are using Vertex AI Workbench Notebooks, your environment is already authenticated. Skip this step.\n", + "\n", + "If you are using Colab, run the cell below and follow the instructions when prompted to authenticate your account via oAuth.\n", + "\n", + "Otherwise, follow these steps:\n", + "\n", + "1. In the Cloud Console, go to the Create service account key page.\n", + "\n", + "2. Click Create service account.\n", + "\n", + "3. In the Service account name field, enter a name, and click Create.\n", + "\n", + "4. In the Grant this service account access to project section, click the Role drop-down list and add the following roles:\n", + "\n", + "- BigQuery Admin\n", + "- Storage Admin\n", + "- Storage Object Admin\n", + "- Vertex AI Administrator\n", + "- Vertex AI Feature Store Admin\n", + "\n", + "5. Click Create. A JSON file that contains your key downloads to your local environment.\n", + "\n", + "6. Enter the path to your service account key as the `GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6a08e2a6c84f" + }, + "outputs": [], + "source": [ + "# If you are running this notebook in Colab, run this cell and follow the\n", + "# instructions to authenticate your GCP account. This provides access to your\n", + "# Cloud Storage bucket and lets you submit training jobs and prediction\n", + "# requests.\n", + "\n", + "import os\n", + "import sys\n", + "\n", + "# If on Vertex AI Workbench, then don't execute this code\n", + "IS_COLAB = \"google.colab\" in sys.modules\n", + "if not os.path.exists(\"/opt/deeplearning/metadata/env_version\") and not os.getenv(\n", + " \"DL_ANACONDA_HOME\"\n", + "):\n", + " if \"google.colab\" in sys.modules:\n", + " from google.colab import auth as google_auth\n", + "\n", + " google_auth.authenticate_user()\n", + "\n", + " # If you are running this notebook locally, replace the string below with the\n", + " # path to your service account key and run this cell to authenticate your GCP\n", + " # account.\n", + " elif not os.getenv(\"IS_TESTING\"):\n", + " %env GOOGLE_APPLICATION_CREDENTIALS ''" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2f52dced4061" + }, + "source": [ + "Create a bucket and copy the exported model to it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "12c9381edd2a" + }, + "outputs": [], + "source": [ + "BUCKET_NAME = \"[your-bucket-name]\" # @param {type:\"string\"}\n", + "BUCKET_URI = f\"gs://{BUCKET_NAME}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50c6cda053e1" + }, + "outputs": [], + "source": [ + "if BUCKET_NAME == \"\" or BUCKET_NAME is None or BUCKET_NAME == \"[your-bucket-name]\":\n", + " BUCKET_NAME = PROJECT_ID + \"-aip-\" + TIMESTAMP\n", + " BUCKET_URI = f\"gs://{BUCKET_NAME}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "197c22fada47" + }, + "source": [ + "Only if your bucket doesn't already exist: Run the following cell to create your Cloud Storage bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2de44a514cd1" + }, + "outputs": [], + "source": [ + "! gcloud storage buckets create --location=$REGION --project=$PROJECT_ID $BUCKET_URI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ca57dc423195" + }, + "outputs": [], + "source": [ + "! gcloud storage ls --all-versions --long $BUCKET_URI" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0a6202578b94" + }, + "source": [ + "Copy the trained model to your bucket" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6b2501fc7f01" + }, + "outputs": [], + "source": [ + "! gcloud storage cp --recursive gs://mco-mm/churn/* $BUCKET_URI" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c672b6901814" + }, + "source": [ + "### Create a service account\n", + "\n", + "We need a service account for this new feature because the prediction workload's credential does not have access to Feature Store. Create one and grant the role `roles/aiplatform.serviceAgent` to it, which will grant it the access to most prediction's resources and Feature Store." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "534b78bcc16d" + }, + "outputs": [], + "source": [ + "SA_NAME = \"prediction-and-fs-testing\"\n", + "SA_DESCRIPTION = '\"SA to test Prediction and Feature Store integration\"'\n", + "DISPLAY_NAME = \"prediction-and-fs-testing\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3cf1c055b80e" + }, + "outputs": [], + "source": [ + "!gcloud iam service-accounts create $SA_NAME \\\n", + " --description=$SA_DESCRIPTION \\\n", + " --display-name=$DISPLAY_NAME" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fef2f4529b22" + }, + "outputs": [], + "source": [ + "# SERVICE_ACCOUNT = \"prediction-and-fs@bon-test-0.iam.gserviceaccount.com\"\n", + "SERVICE_ACCOUNT = (\n", + " f\"{SA_NAME}@{PROJECT_ID}.iam.gserviceaccount.com\" # @param {type:\"string\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8d7d03ced198" + }, + "source": [ + "Grant the Service account the `Storage Admin` and `Vertex AI Feature Store Data Viewer` role to access the artifacts in GCS and data in Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b5e3ad275dfe" + }, + "outputs": [], + "source": [ + "!gcloud projects add-iam-policy-binding $PROJECT_ID \\\n", + " --member=serviceAccount:$SERVICE_ACCOUNT \\\n", + " --role=roles/storage.objectAdmin;\n", + "\n", + "!gcloud projects add-iam-policy-binding $PROJECT_ID \\\n", + " --member=serviceAccount:$SERVICE_ACCOUNT \\\n", + " --role=roles/aiplatform.featurestoreDataViewer;" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d3938f6d37a1" + }, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7759fe720d45" + }, + "outputs": [], + "source": [ + "# General\n", + "import os\n", + "import sys\n", + "\n", + "# Vertex AI and its Feature Store\n", + "from google.cloud import aiplatform\n", + "from google.cloud.aiplatform import Featurestore" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2eb5493cf2ac" + }, + "source": [ + "**NOTE:** The feature is only available in `autopush` now" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9b6b5c58669d" + }, + "outputs": [], + "source": [ + "aiplatform.constants.base.API_BASE_PATH = \"autopush-aiplatform.sandbox.googleapis.com\"\n", + "aiplatform.constants.base.PREDICTION_API_BASE_PATH = (\n", + " \"autopush-prediction-aiplatform.sandbox.googleapis.com\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7def96de8098" + }, + "outputs": [], + "source": [ + "aiplatform.init(project=PROJECT_ID, location=REGION)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "56a185de336f" + }, + "source": [ + "# Create Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "363c77a9e9e3" + }, + "outputs": [], + "source": [ + "FEATURESTORE_ID = \"mobile_gaming_\" + TIMESTAMP # @param {type:\"string\"}\n", + "\n", + "# Vertex AI Feature store\n", + "ONLINE_STORE_NODES_COUNT = 5\n", + "DEMOGRAPHIC_ENTITY_ID = \"demographic\"\n", + "BEHAVIOR_ENTITY_ID = \"behavior\"\n", + "FEATURE_TIME = \"timestamp\"\n", + "ENTITY_ID_FIELD = \"user_pseudo_id\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bcaa5c3644f0" + }, + "source": [ + "We will use the exported sample data of the dataset used in this [this blog post](https://cloud.google.com/blog/topics/developers-practitioners/churn-prediction-game-developers-using-google-analytics-4-ga4-and-bigquery-ml)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "72a36025d162" + }, + "outputs": [], + "source": [ + "SOURCE_URI = \"gs://featurestore_prediction_integration/data/mobile_gaming_dataset.csv\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "85f09125566f" + }, + "source": [ + "## Create Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "764aa2b60016" + }, + "outputs": [], + "source": [ + "try:\n", + " mobile_gaming_feature_store = Featurestore.create(\n", + " featurestore_id=FEATURESTORE_ID,\n", + " online_store_fixed_node_count=ONLINE_STORE_NODES_COUNT,\n", + " sync=True,\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)\n", + "else:\n", + " FEATURESTORE_RESOURCE_NAME = mobile_gaming_feature_store.resource_name\n", + " print(f\"Feature store created: {FEATURESTORE_RESOURCE_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7d4ff784a9f7" + }, + "source": [ + "### Create Entities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "51e62e67d4e8" + }, + "outputs": [], + "source": [ + "try:\n", + " demographic_entity_type = mobile_gaming_feature_store.create_entity_type(\n", + " entity_type_id=DEMOGRAPHIC_ENTITY_ID,\n", + " description=\"User demographic Entity\",\n", + " sync=True,\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)\n", + "else:\n", + " DEMOGRAPHIC_ENTITY_RESOURCE_NAME = demographic_entity_type.resource_name\n", + " print(\"Entity type name is\", DEMOGRAPHIC_ENTITY_RESOURCE_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6ae913f7e9cc" + }, + "outputs": [], + "source": [ + "try:\n", + " behavior_entity_type = mobile_gaming_feature_store.create_entity_type(\n", + " entity_type_id=BEHAVIOR_ENTITY_ID, description=\"User behavior Entity\", sync=True\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)\n", + "else:\n", + " BEHAVIOR_ENTITY_RESOURCE_NAME = behavior_entity_type.resource_name\n", + " print(\"Entity type name is\", BEHAVIOR_ENTITY_RESOURCE_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ae2b5655049b" + }, + "source": [ + "### Create Features" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "809f781e797a" + }, + "source": [ + "#### Feature Config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "22d855f118ea" + }, + "outputs": [], + "source": [ + "demographic_feature_configs = {\n", + " \"country\": {\n", + " \"value_type\": \"STRING\",\n", + " \"description\": \"The country of customer\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"operating_system\": {\n", + " \"value_type\": \"STRING\",\n", + " \"description\": \"The operating system of device\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"language\": {\n", + " \"value_type\": \"STRING\",\n", + " \"description\": \"The language of device\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"user_pseudo_id\": {\n", + " \"value_type\": \"STRING\",\n", + " \"description\": \"User pseudo id\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + "}\n", + "\n", + "behavior_feature_configs = {\n", + " \"cnt_user_engagement\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user engagement level\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_level_start_quickplay\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user engagement with start level\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_level_end_quickplay\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user engagement with end level\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_level_complete_quickplay\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user engagement with complete status\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_level_reset_quickplay\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user engagement with reset status\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_post_score\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user score\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_spend_virtual_currency\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user virtual amount\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_ad_reward\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user reward\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_challenge_a_friend\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user challenges with friends\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_completed_5_levels\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user level 5 completed\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"cnt_use_extra_steps\": {\n", + " \"value_type\": \"DOUBLE\",\n", + " \"description\": \"A variable of user extra steps\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"month\": {\n", + " \"value_type\": \"INT64\",\n", + " \"description\": \"First touch month\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"julianday\": {\n", + " \"value_type\": \"INT64\",\n", + " \"description\": \"First touch julian day\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + " \"dayofweek\": {\n", + " \"value_type\": \"INT64\",\n", + " \"description\": \"First touch day of week\",\n", + " \"labels\": {\"status\": \"passed\"},\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1b72bf136987" + }, + "source": [ + "#### Create features using `batch_create_features` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9c2769834702" + }, + "outputs": [], + "source": [ + "try:\n", + " demographic_entity_type.batch_create_features(\n", + " feature_configs=demographic_feature_configs, sync=True\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)\n", + "else:\n", + " for feature in demographic_entity_type.list_features():\n", + " print(\"\")\n", + " print(f\"The resource name of {feature.name} feature is\", feature.resource_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "68b07d3b2bf0" + }, + "outputs": [], + "source": [ + "try:\n", + " behavior_entity_type.batch_create_features(\n", + " feature_configs=behavior_feature_configs, sync=True\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)\n", + "else:\n", + " for feature in behavior_entity_type.list_features():\n", + " print(\"\")\n", + " print(f\"The resource name of {feature.name} feature is\", feature.resource_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "34bf9fc11ae3" + }, + "source": [ + "### Ingest features " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "92e92c854028" + }, + "outputs": [], + "source": [ + "DEMOGRAPHIC_FEATURES_IDS = [\n", + " feature.name for feature in demographic_entity_type.list_features()\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50a7e935a870" + }, + "outputs": [], + "source": [ + "try:\n", + " demographic_entity_type.ingest_from_gcs(\n", + " feature_ids=DEMOGRAPHIC_FEATURES_IDS,\n", + " feature_time=FEATURE_TIME,\n", + " gcs_source_uris=SOURCE_URI,\n", + " gcs_source_type=\"csv\",\n", + " entity_id_field=ENTITY_ID_FIELD,\n", + " disable_online_serving=False,\n", + " worker_count=10,\n", + " sync=True,\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dcbb4fa32e40" + }, + "outputs": [], + "source": [ + "BEHAVIOR_FEATURES_IDS = [\n", + " feature.name for feature in behavior_entity_type.list_features()\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a36facc52807" + }, + "outputs": [], + "source": [ + "try:\n", + " behavior_entity_type.ingest_from_gcs(\n", + " feature_ids=BEHAVIOR_FEATURES_IDS,\n", + " feature_time=FEATURE_TIME,\n", + " gcs_source_uris=SOURCE_URI,\n", + " gcs_source_type=\"csv\",\n", + " entity_id_field=ENTITY_ID_FIELD,\n", + " disable_online_serving=False,\n", + " worker_count=10,\n", + " sync=True,\n", + " )\n", + "except RuntimeError as error:\n", + " print(error)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bec74a816daf" + }, + "source": [ + "# Create Feature Fetch Config" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1131367aeec" + }, + "source": [ + "## Feature fetch config proto:\n", + "\n", + "```protobuf\n", + "message FeatureFetchConfig {\n", + " // The format of the internal prediction request auto-created after features\n", + " // are fetched. Prediction currently supports XGBoost, TensorFlow and\n", + " // scikit-learn, and will soon start to support Pytorch. Among these\n", + " // frameworks, XGBoost supports array input format only (i.e. input\n", + " // features are in the form of an array), whereas the other three frameworks\n", + " // can allow both dictionary format and array format inputs.\n", + " ModelInputFormat model_input_format = 3;\n", + " enum ModelInputFormat {\n", + " MODEL_INPUT_FORMAT_UNSPECIFIED = 0;\n", + " ARRAY = 1;\n", + " DICT = 2;\n", + " }\n", + "\n", + " // Specifying details of the prediction input\n", + " repeated Feature features = 4;\n", + " message Feature {\n", + " // When internal_request_format = DICT, this value_key is used\n", + " // for the Internal Prediction Request as the key to the feature value.\n", + " // In the FeatureFetchConfig, a pass-through feature can be represented by\n", + " // a Feature message with just a value_key.\n", + " string value_key = 1;\n", + "\n", + " // Defines where from featurestore(s) does each feature comes.\n", + " FeatureSource feature_source = 2;\n", + " message FeatureSource {\n", + " // From a high level, there are two fields in FeatureSource, where\n", + " // entity_id_key is about the \"row\" from where a value is fetched, and\n", + " // feature_resource_path is about the \"column\" from where a value is\n", + " // fetched.\n", + "\n", + " // Specifies which key holds the entity ID in the external request sent by\n", + " // the user to the prediction service.\n", + " string entity_id_key = 1;\n", + "\n", + " // The resource path in URL format, to identify the entity type.\n", + " // The format should be\n", + " // projects/PROJECT/locations/LOCATION/featurestores/FEATURESTORE_ID/entityTypes/ENTITY_TYPE_ID/\n", + " // e.g.\n", + " // \"projects/my-feature-store-project/locations/us-central1/featurestores/movie_predictions/entityTypes/movies/\"\n", + " string entity_type = 2;\n", + "\n", + " // The feature ID defined in feature store.\n", + " string feature_id = 3;\n", + " }\n", + " }\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "092d012dfd27" + }, + "source": [ + "## Generate Feature fetch config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7ee405070fe4" + }, + "outputs": [], + "source": [ + "FEATURE_FETCH_CONFIG_TEMPLATE = \"\"\"modelInputFormat: DICT\n", + "features:\n", + "- valueKey: user_pseudo_id\n", + "- valueKey: country\n", + " featureSource:\n", + " entityIdKey: demographic\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", + " featureId: country\n", + "- valueKey: operating_system\n", + " featureSource:\n", + " entityIdKey: demographic\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", + " featureId: operating_system\n", + "- valueKey: language\n", + " featureSource:\n", + " entityIdKey: demographic\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/demographic\n", + " featureId: language\n", + "- valueKey: cnt_user_engagement\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_user_engagement\n", + "- valueKey: cnt_level_start_quickplay\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_level_start_quickplay\n", + "- valueKey: cnt_level_end_quickplay\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_level_end_quickplay\n", + "- valueKey: cnt_level_complete_quickplay\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_level_complete_quickplay\n", + "- valueKey: cnt_level_reset_quickplay\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_level_reset_quickplay\n", + "- valueKey: cnt_post_score\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_post_score\n", + "- valueKey: cnt_spend_virtual_currency\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_spend_virtual_currency\n", + "- valueKey: cnt_ad_reward\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_ad_reward\n", + "- valueKey: cnt_challenge_a_friend\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_challenge_a_friend\n", + "- valueKey: cnt_completed_5_levels\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_completed_5_levels\n", + "- valueKey: cnt_use_extra_steps\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: cnt_use_extra_steps\n", + "- valueKey: month\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: month\n", + "- valueKey: julianday\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: julianday\n", + "- valueKey: dayofweek\n", + " featureSource:\n", + " entityIdKey: behavior\n", + " entityType: projects/{PROJECT_NUMBER}/locations/{REGION}/featurestores/{FEATURESTORE_ID}/entityTypes/behavior\n", + " featureId: dayofweek\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "767d211e77ff" + }, + "outputs": [], + "source": [ + "feature_fetch_config = FEATURE_FETCH_CONFIG_TEMPLATE.format(\n", + " PROJECT_NUMBER=PROJECT_NUMBER, REGION=REGION, FEATURESTORE_ID=FEATURESTORE_ID\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "be646e50a3aa" + }, + "outputs": [], + "source": [ + "print(feature_fetch_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f4b685628924" + }, + "outputs": [], + "source": [ + "with open(\"prediction_featuresstore_fetch_config.yaml\", \"w\") as f:\n", + " f.write(feature_fetch_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9156a38ac8b4" + }, + "outputs": [], + "source": [ + "# Remove if the file already exists\n", + "!gcloud storage rm $BUCKET_URI/prediction_featuresstore_fetch_config.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9ac494223985" + }, + "outputs": [], + "source": [ + "!gcloud storage cp prediction_featuresstore_fetch_config.yaml $BUCKET_URI" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5e58334c94b6" + }, + "source": [ + "# Integrate with Vertex Prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b4dfbffa88e0" + }, + "source": [ + "## Upload Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "43b28a15eae9" + }, + "outputs": [], + "source": [ + "DEPLOY_IMAGE = \"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-7:latest\"\n", + "DISPLAY_NAME = \"mobile_gaming_featureStore_integration_\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "03e6cf99f099" + }, + "outputs": [], + "source": [ + "model = aiplatform.Model.upload(\n", + " display_name=DISPLAY_NAME + TIMESTAMP,\n", + " artifact_uri=BUCKET_URI,\n", + " serving_container_image_uri=DEPLOY_IMAGE,\n", + " sync=False,\n", + ")\n", + "\n", + "model.wait()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9bb077790432" + }, + "source": [ + "## Online Prediction" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eaaf2d7adf0d" + }, + "source": [ + "### Deploy Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dc5406d7e46c" + }, + "outputs": [], + "source": [ + "DEPLOYED_NAME = DISPLAY_NAME + TIMESTAMP\n", + "\n", + "TRAFFIC_SPLIT = {\"0\": 100}\n", + "\n", + "MACHINE_TYPE = \"n1-standard-4\"\n", + "\n", + "MIN_NODES = 1\n", + "MAX_NODES = 1\n", + "\n", + "endpoint = model.deploy(\n", + " deployed_model_display_name=DEPLOYED_NAME,\n", + " traffic_split=TRAFFIC_SPLIT,\n", + " machine_type=MACHINE_TYPE,\n", + " min_replica_count=MIN_NODES,\n", + " max_replica_count=MAX_NODES,\n", + " service_account=SERVICE_ACCOUNT,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fc1e1f273ed2" + }, + "source": [ + "### Predict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c83e8207bf74" + }, + "outputs": [], + "source": [ + "default_pred_request = [\n", + " {\n", + " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"country\": \"Australia\",\n", + " \"operating_system\": \"IOS\",\n", + " \"language\": \"en-au\",\n", + " \"cnt_user_engagement\": 3.0,\n", + " \"cnt_level_start_quickplay\": 1.0,\n", + " \"cnt_level_end_quickplay\": 0.0,\n", + " \"cnt_level_complete_quickplay\": 0.0,\n", + " \"cnt_level_reset_quickplay\": 0.0,\n", + " \"cnt_post_score\": 0.0,\n", + " \"cnt_spend_virtual_currency\": 0.0,\n", + " \"cnt_ad_reward\": 0.0,\n", + " \"cnt_challenge_a_friend\": 0.0,\n", + " \"cnt_completed_5_levels\": 0.0,\n", + " \"cnt_use_extra_steps\": 0.0,\n", + " \"month\": 7,\n", + " \"julianday\": 194,\n", + " \"dayofweek\": 6,\n", + " },\n", + " {\n", + " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"country\": \"United States\",\n", + " \"operating_system\": \"IOS\",\n", + " \"language\": \"en-us\",\n", + " \"cnt_user_engagement\": 1.0,\n", + " \"cnt_level_start_quickplay\": 1.0,\n", + " \"cnt_level_end_quickplay\": 0.0,\n", + " \"cnt_level_complete_quickplay\": 0.0,\n", + " \"cnt_level_reset_quickplay\": 0.0,\n", + " \"cnt_post_score\": 0.0,\n", + " \"cnt_spend_virtual_currency\": 0.0,\n", + " \"cnt_ad_reward\": 0.0,\n", + " \"cnt_challenge_a_friend\": 0.0,\n", + " \"cnt_completed_5_levels\": 0.0,\n", + " \"cnt_use_extra_steps\": 0.0,\n", + " \"month\": 6,\n", + " \"julianday\": 173,\n", + " \"dayofweek\": 6,\n", + " },\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "430fe30f0878" + }, + "outputs": [], + "source": [ + "fs_pred_request = [\n", + " {\n", + " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"demographic\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"behavior\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " },\n", + " {\n", + " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"demographic\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"behavior\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "496710f4bb97" + }, + "source": [ + "Features fetched from Feature Store can be overriden" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a8c4c871a305" + }, + "outputs": [], + "source": [ + "fs_pred_request_with_overridden_features = [\n", + " {\n", + " \"user_pseudo_id\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"demographic\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"behavior\": \"AB0F2EE5F9F401763BE1E9FA55410312\",\n", + " \"cnt_ad_reward\": 10.0,\n", + " \"cnt_challenge_a_friend\": 10.0,\n", + " \"cnt_completed_5_levels\": 10.0,\n", + " \"cnt_use_extra_steps\": 10.0,\n", + " },\n", + " {\n", + " \"user_pseudo_id\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"demographic\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"behavior\": \"E5D16173400729E05DFFB4883FA6EF1C\",\n", + " \"cnt_ad_reward\": 10.0,\n", + " \"cnt_challenge_a_friend\": 10.0,\n", + " \"cnt_completed_5_levels\": 10.0,\n", + " \"cnt_use_extra_steps\": 10.0,\n", + " },\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7bd959dc40c6" + }, + "outputs": [], + "source": [ + "endpoint.predict([default_pred_request[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bd0a77f5f49f" + }, + "outputs": [], + "source": [ + "default_response = endpoint.predict(default_pred_request)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ddf8a41c4844" + }, + "outputs": [], + "source": [ + "print(default_response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "36ddbb811f47" + }, + "source": [ + "### Single instance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d1c5c7ae4015" + }, + "outputs": [], + "source": [ + "endpoint.predict([fs_pred_request[0]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "78d40546ec32" + }, + "outputs": [], + "source": [ + "endpoint.predict([fs_pred_request_with_overridden_features[0]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "81e5fde2d430" + }, + "source": [ + "### Multiple instances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "64e4735f3f6a" + }, + "outputs": [], + "source": [ + "fs_response = endpoint.predict(fs_pred_request)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "12ff29927bf9" + }, + "outputs": [], + "source": [ + "print(fs_response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8f842f1b6b32" + }, + "outputs": [], + "source": [ + "fs_with_overridden_features_response = endpoint.predict(\n", + " fs_pred_request_with_overridden_features\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "705ebe0b6cdf" + }, + "outputs": [], + "source": [ + "print(fs_with_overridden_features_response)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "68db6bc5e8b2" + }, + "source": [ + "### Compare response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "912daf9b976b" + }, + "outputs": [], + "source": [ + "print(default_response.predictions == fs_response.predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3ce09d9bc5a5" + }, + "source": [ + "# Clean Up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c5f566238d9a" + }, + "outputs": [], + "source": [ + "# delete feature store\n", + "mobile_gaming_feature_store.delete(sync=True, force=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "540b19cca2bb" + }, + "outputs": [], + "source": [ + "# delete Vertex AI resources\n", + "endpoint.undeploy_all()\n", + "endpoint.delete()\n", + "model.delete" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "42172d5bc91b" + }, + "outputs": [], + "source": [ + "# Delete bucket\n", + "!gcloud storage rm --recursive $BUCKET_URI" + ] + } + ], + "metadata": { + "colab": { + "name": "prediction_featurestore_integration.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb b/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb index 0cf458bf4..45c092190 100644 --- a/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb +++ b/notebooks/community/ml_ops/stage3/get_started_with_model_registry.ipynb @@ -441,7 +441,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location=$REGION $BUCKET_URI" ] + "! gcloud storage buckets create --location=$REGION $BUCKET_URI" + ] }, { "cell_type": "markdown", @@ -460,7 +461,8 @@ }, "outputs": [], "source": [ - "! gcloud storage ls --all-versions --long $BUCKET_URI" ] + "! gcloud storage ls --all-versions --long $BUCKET_URI" + ] }, { "cell_type": "markdown", @@ -1174,7 +1176,8 @@ " print(e)\n", "\n", "if delete_bucket or os.getenv(\"IS_TESTING\"):\n", - " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}" ] + " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}" + ] } ], "metadata": { diff --git a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb index 8a7392102..654a39dec 100644 --- a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb +++ b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb @@ -253,7 +253,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" + ] }, { "cell_type": "markdown", @@ -1236,7 +1237,8 @@ " rag.delete_corpus(name=rag_corpus.name)\n", "\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_NAME" ] + " ! gcloud storage rm --recursive $BUCKET_NAME" + ] } ], "metadata": { diff --git a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb index 5428f2e86..7234a2ddb 100644 --- a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb +++ b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb @@ -236,7 +236,9 @@ "# https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.\n", "\n", "# @markdown L4 GPUs are good serving solutions and are more cost effective than V100s for 8x7B models. The 8x22B models only works with A100/H100 GPUs now.\n", - "accelerator_type = \"NVIDIA_L4\" # @param [\"NVIDIA_L4\", \"NVIDIA_TESLA_V100\", \"NVIDIA_H100_80GB\"]\n", + "accelerator_type = (\n", + " \"NVIDIA_L4\" # @param [\"NVIDIA_L4\", \"NVIDIA_TESLA_V100\", \"NVIDIA_H100_80GB\"]\n", + ")\n", "\n", "if accelerator_type == \"NVIDIA_L4\":\n", " machine_type = \"g2-standard-96\"\n", diff --git a/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb b/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb index 5fef39e60..ccaec2db5 100644 --- a/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb +++ b/notebooks/community/vertex_endpoints/tf_hub_obj_detection/deploy_tfhub_object_detection_on_vertex_endpoints.ipynb @@ -445,7 +445,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --project=$PROJECT_ID --location=$REGION $BUCKET_NAME" ] + "! gcloud storage buckets create --project=$PROJECT_ID --location=$REGION $BUCKET_NAME" + ] }, { "cell_type": "markdown", @@ -464,7 +465,8 @@ }, "outputs": [], "source": [ - "! gcloud storage ls --all-versions --long $BUCKET_NAME" ] + "! gcloud storage ls --all-versions --long $BUCKET_NAME" + ] }, { "cell_type": "markdown", @@ -510,11 +512,12 @@ }, "outputs": [], "source": [ + "from io import BytesIO\n", + "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import tensorflow as tf\n", - "from PIL import Image\n", - "from six import BytesIO" + "from PIL import Image" ] }, { @@ -948,7 +951,8 @@ }, "outputs": [], "source": [ - "!gcloud storage cp --recursive $VERTEX_MODEL_PATH $BUCKET_NAME/obj_detection_model_vertex" ] + "!gcloud storage cp --recursive $VERTEX_MODEL_PATH $BUCKET_NAME/obj_detection_model_vertex" + ] }, { "cell_type": "code", @@ -958,7 +962,8 @@ }, "outputs": [], "source": [ - "!gcloud storage ls $BUCKET_NAME" ] + "!gcloud storage ls $BUCKET_NAME" + ] }, { "cell_type": "markdown", @@ -1293,7 +1298,8 @@ "--quiet\n", "\n", "# Delete Cloud Storage objects that were created\n", - "#! gcloud storage rm --recursive $BUCKET_NAME" ] + "#! gcloud storage rm --recursive $BUCKET_NAME" + ] } ], "metadata": { diff --git a/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb b/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb index 8d72c5d4f..ae118f5ba 100644 --- a/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb +++ b/notebooks/official/feature_store_legacy/sdk-feature-store-pandas.ipynb @@ -303,7 +303,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" + ] }, { "cell_type": "markdown", @@ -563,7 +564,10 @@ "# set the local file names\n", "USERS_AVRO_FN = \"users.avro\"\n", "MOVIES_AVRO_FN = \"movies.avro\"\n", - "# copy the files using gcloud storage\n", "! gcloud storage cp $GCS_USERS_AVRO_URI $USERS_AVRO_FN\n", "! gcloud storage cp $GCS_MOVIES_AVRO_URI $MOVIES_AVRO_FN" ] + "# copy the files using gcloud storage\n", + "! gcloud storage cp $GCS_USERS_AVRO_URI $USERS_AVRO_FN\n", + "! gcloud storage cp $GCS_MOVIES_AVRO_URI $MOVIES_AVRO_FN" + ] }, { "cell_type": "markdown", @@ -1123,7 +1127,8 @@ "# Delete Cloud Storage objects that were created\n", "delete_bucket = False # Set True for deletion\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_URI" ] + " ! gcloud storage rm --recursive $BUCKET_URI" + ] } ], "metadata": { diff --git a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb index d56022ded..0ca89fe50 100644 --- a/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb +++ b/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb @@ -296,7 +296,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" + ] }, { "cell_type": "markdown", @@ -370,7 +371,8 @@ "source": [ "! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectCreator\n", "\n", - "! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectViewer" ] + "! gcloud storage buckets add-iam-policy-binding $BUCKET_URI --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.objectViewer" + ] }, { "cell_type": "markdown", @@ -1066,11 +1068,14 @@ " + \"/evaluation_metrics\"\n", " )\n", " if tf.io.gfile.exists(EXECUTE_OUTPUT):\n", - " ! gcloud storage cat $EXECUTE_OUTPUT\n", " return EXECUTE_OUTPUT\n", + " ! gcloud storage cat $EXECUTE_OUTPUT\n", + " return EXECUTE_OUTPUT\n", " elif tf.io.gfile.exists(GCP_RESOURCES):\n", - " ! gcloud storage cat $GCP_RESOURCES\n", " return GCP_RESOURCES\n", + " ! gcloud storage cat $GCP_RESOURCES\n", + " return GCP_RESOURCES\n", " elif tf.io.gfile.exists(EVAL_METRICS):\n", - " ! gcloud storage cat $EVAL_METRICS\n", " return EVAL_METRICS\n", + " ! gcloud storage cat $EVAL_METRICS\n", + " return EVAL_METRICS\n", "\n", " return None\n", "\n", @@ -1078,13 +1083,15 @@ "print(\"model-upload\")\n", "artifacts = print_pipeline_output(job, \"model-upload\")\n", "print(\"\\n\")\n", - "output = !gcloud storage cat $artifacts\n", "print(output)\n", + "output = !gcloud storage cat $artifacts\n", + "print(output)\n", "output = json.loads(output[0])\n", "model_id = output[\"artifacts\"][\"model\"][\"artifacts\"][0][\"metadata\"][\"resourceName\"]\n", "print(\"model-batch-predict\")\n", "artifacts = print_pipeline_output(job, \"model-batch-predict\")\n", "print(\"\\n\")\n", - "output = !gcloud storage cat $artifacts\n", "output = json.loads(output[0])\n", + "output = !gcloud storage cat $artifacts\n", + "output = json.loads(output[0])\n", "batch_job_id = output[\"artifacts\"][\"batchpredictionjob\"][\"artifacts\"][0][\"metadata\"][\n", " \"resourceName\"\n", "]" @@ -1126,7 +1133,8 @@ "# Delete the Cloud Storage bucket\n", "delete_bucket = False # Set True for deletion\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_URI\n", "\n", + " ! gcloud storage rm --recursive $BUCKET_URI\n", + "\n", "# Remove the locally generated files\n", "! rm custom_model_training_spec.yaml\n", "! rm -rf custom" diff --git a/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb b/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb index b4564bb80..ea78771af 100644 --- a/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb +++ b/notebooks/official/prediction/get_started_with_vertex_private_endpoints.ipynb @@ -307,7 +307,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" ] + "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" + ] }, { "cell_type": "markdown", @@ -974,7 +975,8 @@ }, "outputs": [], "source": [ - "! gcloud storage cp gs://cloud-ml-data/img/flower_photos/daisy/100080576_f52e8ee070_n.jpg test.jpg" ] + "! gcloud storage cp gs://cloud-ml-data/img/flower_photos/daisy/100080576_f52e8ee070_n.jpg test.jpg" + ] }, { "cell_type": "code", @@ -1169,7 +1171,8 @@ " print(e)\n", "\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}\n", "\n", + " ! gcloud storage rm --recursive --continue-on-error {BUCKET_URI}\n", + "\n", "if delete_generated_files:\n", " ! rm -rf \"test.jpg\" \"instances.json\"" ] diff --git a/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb b/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb index 372d2f913..8df1c161f 100644 --- a/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb +++ b/notebooks/official/tabular_workflows/wide_and_deep_on_vertex_pipelines.ipynb @@ -297,7 +297,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location $LOCATION --project $PROJECT_ID $BUCKET_URI" ] + "! gcloud storage buckets create --location $LOCATION --project $PROJECT_ID $BUCKET_URI" + ] }, { "cell_type": "markdown", @@ -956,7 +957,8 @@ "# Delete bucket\n", "delete_bucket = False\n", "if delete_bucket or os.getenv(\"IS_TESTING\"):\n", - " ! gcloud storage rm --recursive $BUCKET_URI" ] + " ! gcloud storage rm --recursive $BUCKET_URI" + ] } ], "metadata": { diff --git a/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb b/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb index 0c4e176d1..5403e356b 100644 --- a/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb +++ b/notebooks/official/training/multi_node_ddp_gloo_vertex_training_with_custom_container.ipynb @@ -282,7 +282,8 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location=$LOCATION --project=$PROJECT_ID $BUCKET_URI" ] + "! gcloud storage buckets create --location=$LOCATION --project=$PROJECT_ID $BUCKET_URI" + ] }, { "cell_type": "markdown", @@ -1202,7 +1203,8 @@ }, "outputs": [], "source": [ - "! gcloud storage ls $gcs_output_uri_prefix" ] + "! gcloud storage ls $gcs_output_uri_prefix" + ] }, { "cell_type": "markdown", @@ -1240,14 +1242,16 @@ "# Set this to true only if you'd like to delete your artifact repository\n", "delete_artifact_repository = False\n", "\n", - "! gcloud storage rm --recursive --continue-on-error $gcs_output_uri_prefix\n", "\n", + "! gcloud storage rm --recursive --continue-on-error $gcs_output_uri_prefix\n", + "\n", "! rm -rf ./trainer\n", "\n", "if delete_artifact_repository:\n", " !gcloud artifacts repositories delete {PRIVATE_REPO} --location={LOCATION} --quiet\n", "\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_URI\n", "\n", + " ! gcloud storage rm --recursive $BUCKET_URI\n", + "\n", "if delete_tensorboard:\n", " tensorboard.delete()" ] From c1378af1c71bc0e9af3ba4af1bf9ce0b8d14957b Mon Sep 17 00:00:00 2001 From: gurusai-voleti Date: Mon, 22 Dec 2025 12:46:51 +0000 Subject: [PATCH 4/4] removed model_garden folder changes --- .../model_garden_openai_api_llama3_1.ipynb | 4 ++-- .../model_garden_pytorch_mixtral_deployment.ipynb | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb index 654a39dec..6bf4011f3 100644 --- a/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb +++ b/notebooks/community/model_garden/model_garden_openai_api_llama3_1.ipynb @@ -253,7 +253,7 @@ }, "outputs": [], "source": [ - "! gcloud storage buckets create --location={LOCATION} --project={PROJECT_ID} {BUCKET_URI}" + "! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}" ] }, { @@ -1237,7 +1237,7 @@ " rag.delete_corpus(name=rag_corpus.name)\n", "\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_NAME" + " ! gsutil -m rm -r $BUCKET_NAME" ] } ], diff --git a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb index 7234a2ddb..c0484bbfd 100644 --- a/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb +++ b/notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb @@ -174,10 +174,10 @@ "if BUCKET_URI is None or BUCKET_URI.strip() == \"\" or BUCKET_URI == \"gs://\":\n", " BUCKET_URI = f\"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}\"\n", " BUCKET_NAME = \"/\".join(BUCKET_URI.split(\"/\")[:3])\n", - " ! gcloud storage buckets create --location {REGION} {BUCKET_URI}\n", + " ! gsutil mb -l {REGION} {BUCKET_URI}\n", "else:\n", " assert BUCKET_URI.startswith(\"gs://\"), \"BUCKET_URI must start with `gs://`.\"\n", - " shell_output = ! gcloud storage ls --full --buckets {BUCKET_NAME} | grep \"Location Constraint:\" | sed \"s/Location Constraint://\"\n", + " shell_output = ! gsutil ls -Lb {BUCKET_NAME} | grep \"Location constraint:\" | sed \"s/Location constraint://\"\n", " bucket_region = shell_output[0].strip().lower()\n", " if bucket_region != REGION:\n", " raise ValueError(\n", @@ -202,7 +202,7 @@ "\n", "\n", "# Provision permissions to the SERVICE_ACCOUNT with the GCS bucket\n", - "! gcloud storage buckets add-iam-policy-binding $BUCKET_NAME --member=serviceAccount:{SERVICE_ACCOUNT} --role=roles/storage.admin\n", + "! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.admin $BUCKET_NAME\n", "\n", "! gcloud config set project $PROJECT_ID\n", "! gcloud projects add-iam-policy-binding --no-user-output-enabled {PROJECT_ID} --member=serviceAccount:{SERVICE_ACCOUNT} --role=\"roles/storage.admin\"\n", @@ -236,9 +236,7 @@ "# https://cloud.google.com/vertex-ai/docs/predictions/configure-compute.\n", "\n", "# @markdown L4 GPUs are good serving solutions and are more cost effective than V100s for 8x7B models. The 8x22B models only works with A100/H100 GPUs now.\n", - "accelerator_type = (\n", - " \"NVIDIA_L4\" # @param [\"NVIDIA_L4\", \"NVIDIA_TESLA_V100\", \"NVIDIA_H100_80GB\"]\n", - ")\n", + "accelerator_type = \"NVIDIA_L4\" # @param [\"NVIDIA_L4\", \"NVIDIA_TESLA_V100\", \"NVIDIA_H100_80GB\"]\n", "\n", "if accelerator_type == \"NVIDIA_L4\":\n", " machine_type = \"g2-standard-96\"\n", @@ -828,7 +826,7 @@ "\n", "delete_bucket = False # @param {type:\"boolean\"}\n", "if delete_bucket:\n", - " ! gcloud storage rm --recursive $BUCKET_NAME" + " ! gsutil -m rm -r $BUCKET_NAME" ] } ],