Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d84ae4f
Migrate gsutil usage to gcloud storage
googlyrahman Oct 17, 2025
5d897fd
Manual Changes
bhandarivijay-png Dec 5, 2025
71c777d
Manual Changes
bhandarivijay-png Dec 5, 2025
eed921b
Fix: Updated gcloud storage command without formatting
bhandarivijay-png Dec 5, 2025
fd8bb16
Manual Changes
bhandarivijay-png Dec 9, 2025
a7a7bda
Manual Changes
bhandarivijay-png Dec 9, 2025
3f68933
Revert "Manual Changes"
bhandarivijay-png Dec 9, 2025
3959c2b
Manual Changes
bhandarivijay-png Dec 9, 2025
5c2c3f9
Revert "Manual Changes"
bhandarivijay-png Dec 9, 2025
8bd0d77
Manual Changes
bhandarivijay-png Dec 9, 2025
501a589
Revert "Manual Changes"
bhandarivijay-png Dec 9, 2025
d44e3b1
Manual Changes
bhandarivijay-png Dec 9, 2025
46fb663
Manual changes
bhandarivijay-png Dec 9, 2025
ce16436
Changes for 4339
bhandarivijay-png Dec 15, 2025
e86ab07
Changes for 4339
bhandarivijay-png Dec 15, 2025
9e18216
Changes for 4339
bhandarivijay-png Dec 15, 2025
3908d9e
Fix: Applied linter formatting and resolved style issues
bhandarivijay-png Dec 16, 2025
54ac7e9
Merge pull request #16 from bhandarivijay-png/ai-gsutil-migration-c51…
gurusai-voleti Dec 22, 2025
45891a6
gcloud to gsutilchanges for 4339
bhandarivijay-png Dec 22, 2025
75be161
Merge pull request #80 from bhandarivijay-png/ai-gsutil-migration-c51…
gurusai-voleti Dec 22, 2025
8aba254
removed gsutil to gcloud migration
bhandarivijay-png Dec 22, 2025
b949266
Manual changes
bhandarivijay-png Dec 22, 2025
36d6835
Merge pull request #83 from bhandarivijay-png/ai-gsutil-migration-c51…
gurusai-voleti Dec 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@
"if not IS_GOOGLE_CLOUD_NOTEBOOK:\n",
" if \"google.colab\" in sys.modules:\n",
" from google.colab import auth as google_auth\n",
"\n",
" google_auth.authenticate_user()\n",
"\n",
" # If you are running this notebook locally, replace the string below with the\n",
Expand Down Expand Up @@ -472,7 +473,7 @@
},
"outputs": [],
"source": [
"! gsutil mb -l $REGION $BUCKET_NAME"
"! gcloud storage buckets create --location $REGION $BUCKET_NAME"
]
},
{
Expand All @@ -492,7 +493,7 @@
},
"outputs": [],
"source": [
"! gsutil ls -al $BUCKET_NAME"
"! gcloud storage ls --all-versions --long $BUCKET_NAME"
]
},
{
Expand Down Expand Up @@ -565,7 +566,7 @@
"outputs": [],
"source": [
"# Copy the sample data into your DATA_PATH\n",
"! gsutil cp \"gs://cloud-samples-data/vertex-ai/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/u.data\" $DATA_PATH"
"! gcloud storage cp \"gs://cloud-samples-data/vertex-ai/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/u.data\" $DATA_PATH"
]
},
{
Expand All @@ -579,11 +580,15 @@
"# Set hyperparameters.\n",
"BATCH_SIZE = 8 # @param {type:\"integer\"} Training and prediction batch size.\n",
"TRAINING_LOOPS = 5 # @param {type:\"integer\"} Number of training iterations.\n",
"STEPS_PER_LOOP = 2 # @param {type:\"integer\"} Number of driver steps per training iteration.\n",
"STEPS_PER_LOOP = (\n",
" 2 # @param {type:\"integer\"} Number of driver steps per training iteration.\n",
")\n",
"\n",
"# Set MovieLens simulation environment parameters.\n",
"RANK_K = 20 # @param {type:\"integer\"} Rank for matrix factorization in the MovieLens environment; also the observation dimension.\n",
"NUM_ACTIONS = 20 # @param {type:\"integer\"} Number of actions (movie items) to choose from.\n",
"NUM_ACTIONS = (\n",
" 20 # @param {type:\"integer\"} Number of actions (movie items) to choose from.\n",
")\n",
"PER_ARM = False # Use the non-per-arm version of the MovieLens environment.\n",
"\n",
"# Set agent parameters.\n",
Expand Down Expand Up @@ -621,7 +626,8 @@
"source": [
"# Define RL environment.\n",
"env = movielens_py_environment.MovieLensPyEnvironment(\n",
" DATA_PATH, RANK_K, BATCH_SIZE, num_movies=NUM_ACTIONS, csv_delimiter=\"\\t\")\n",
" DATA_PATH, RANK_K, BATCH_SIZE, num_movies=NUM_ACTIONS, csv_delimiter=\"\\t\"\n",
")\n",
"environment = tf_py_environment.TFPyEnvironment(env)\n",
"\n",
"# Define RL agent/algorithm.\n",
Expand All @@ -631,15 +637,17 @@
" tikhonov_weight=TIKHONOV_WEIGHT,\n",
" alpha=AGENT_ALPHA,\n",
" dtype=tf.float32,\n",
" accepts_per_arm_features=PER_ARM)\n",
" accepts_per_arm_features=PER_ARM,\n",
")\n",
"print(\"TimeStep Spec (for each batch):\\n\", agent.time_step_spec, \"\\n\")\n",
"print(\"Action Spec (for each batch):\\n\", agent.action_spec, \"\\n\")\n",
"print(\"Reward Spec (for each batch):\\n\", environment.reward_spec(), \"\\n\")\n",
"\n",
"# Define RL metric.\n",
"optimal_reward_fn = functools.partial(\n",
" environment_utilities.compute_optimal_reward_with_movielens_environment,\n",
" environment=environment)\n",
" environment=environment,\n",
")\n",
"regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn)\n",
"metrics = [regret_metric]"
]
Expand Down Expand Up @@ -704,35 +712,38 @@
" if training_data_spec_transformation_fn is None:\n",
" data_spec = agent.policy.trajectory_spec\n",
" else:\n",
" data_spec = training_data_spec_transformation_fn(\n",
" agent.policy.trajectory_spec)\n",
" replay_buffer = trainer.get_replay_buffer(data_spec, environment.batch_size,\n",
" steps_per_loop)\n",
" data_spec = training_data_spec_transformation_fn(agent.policy.trajectory_spec)\n",
" replay_buffer = trainer.get_replay_buffer(\n",
" data_spec, environment.batch_size, steps_per_loop\n",
" )\n",
"\n",
" # `step_metric` records the number of individual rounds of bandit interaction;\n",
" # that is, (number of trajectories) * batch_size.\n",
" step_metric = tf_metrics.EnvironmentSteps()\n",
" metrics = [\n",
" tf_metrics.NumberOfEpisodes(),\n",
" tf_metrics.AverageEpisodeLengthMetric(batch_size=environment.batch_size)\n",
" tf_metrics.AverageEpisodeLengthMetric(batch_size=environment.batch_size),\n",
" ]\n",
" if additional_metrics:\n",
" metrics += additional_metrics\n",
"\n",
" if isinstance(environment.reward_spec(), dict):\n",
" metrics += [tf_metrics.AverageReturnMultiMetric(\n",
" reward_spec=environment.reward_spec(),\n",
" batch_size=environment.batch_size)]\n",
" else:\n",
" metrics += [\n",
" tf_metrics.AverageReturnMetric(batch_size=environment.batch_size)]\n",
" tf_metrics.AverageReturnMultiMetric(\n",
" reward_spec=environment.reward_spec(), batch_size=environment.batch_size\n",
" )\n",
" ]\n",
" else:\n",
" metrics += [tf_metrics.AverageReturnMetric(batch_size=environment.batch_size)]\n",
"\n",
" # Store intermediate metric results, indexed by metric names.\n",
" metric_results = defaultdict(list)\n",
"\n",
" if training_data_spec_transformation_fn is not None:\n",
" def add_batch_fn(data): return replay_buffer.add_batch(training_data_spec_transformation_fn(data)) \n",
" \n",
"\n",
" def add_batch_fn(data):\n",
" return replay_buffer.add_batch(training_data_spec_transformation_fn(data))\n",
"\n",
" else:\n",
" add_batch_fn = replay_buffer.add_batch\n",
"\n",
Expand All @@ -742,10 +753,12 @@
" env=environment,\n",
" policy=agent.collect_policy,\n",
" num_steps=steps_per_loop * environment.batch_size,\n",
" observers=observers)\n",
" observers=observers,\n",
" )\n",
"\n",
" training_loop = trainer.get_training_loop_fn(\n",
" driver, replay_buffer, agent, steps_per_loop)\n",
" driver, replay_buffer, agent, steps_per_loop\n",
" )\n",
" saver = policy_saver.PolicySaver(agent.policy)\n",
"\n",
" for _ in range(training_loops):\n",
Expand Down Expand Up @@ -783,7 +796,8 @@
" environment=environment,\n",
" training_loops=TRAINING_LOOPS,\n",
" steps_per_loop=STEPS_PER_LOOP,\n",
" additional_metrics=metrics)\n",
" additional_metrics=metrics,\n",
")\n",
"\n",
"tf.profiler.experimental.stop()"
]
Expand Down Expand Up @@ -1092,11 +1106,15 @@
},
"outputs": [],
"source": [
"RUN_HYPERPARAMETER_TUNING = True # Execute hyperparameter tuning instead of regular training.\n",
"RUN_HYPERPARAMETER_TUNING = (\n",
" True # Execute hyperparameter tuning instead of regular training.\n",
")\n",
"TRAIN_WITH_BEST_HYPERPARAMETERS = False # Do not train.\n",
"\n",
"HPTUNING_RESULT_DIR = \"hptuning/\" # @param {type: \"string\"} Directory to store the best hyperparameter(s) in `BUCKET_NAME` and locally (temporarily).\n",
"HPTUNING_RESULT_PATH = os.path.join(HPTUNING_RESULT_DIR, \"result.json\") # @param {type: \"string\"} Path to the file containing the best hyperparameter(s)."
"HPTUNING_RESULT_PATH = os.path.join(\n",
" HPTUNING_RESULT_DIR, \"result.json\"\n",
") # @param {type: \"string\"} Path to the file containing the best hyperparameter(s)."
]
},
{
Expand Down Expand Up @@ -1124,7 +1142,7 @@
" image_uri: str,\n",
" args: List[str],\n",
" location: str = \"us-central1\",\n",
" api_endpoint: str = \"us-central1-aiplatform.googleapis.com\"\n",
" api_endpoint: str = \"us-central1-aiplatform.googleapis.com\",\n",
") -> None:\n",
" \"\"\"Creates a hyperparameter tuning job using a custom container.\n",
"\n",
Expand Down Expand Up @@ -1197,8 +1215,8 @@
"\n",
" # Create job\n",
" response = client.create_hyperparameter_tuning_job(\n",
" parent=parent,\n",
" hyperparameter_tuning_job=hyperparameter_tuning_job)\n",
" parent=parent, hyperparameter_tuning_job=hyperparameter_tuning_job\n",
" )\n",
" job_id = response.name.split(\"/\")[-1]\n",
" print(\"Job ID:\", job_id)\n",
" print(\"Job config:\", response)\n",
Expand Down Expand Up @@ -1242,7 +1260,8 @@
" image_uri=f\"gcr.io/{PROJECT_ID}/{HPTUNING_TRAINING_CONTAINER}:latest\",\n",
" args=args,\n",
" location=REGION,\n",
" api_endpoint=f\"{REGION}-aiplatform.googleapis.com\")"
" api_endpoint=f\"{REGION}-aiplatform.googleapis.com\",\n",
")"
]
},
{
Expand Down Expand Up @@ -1292,7 +1311,8 @@
" name = client.hyperparameter_tuning_job_path(\n",
" project=project,\n",
" location=location,\n",
" hyperparameter_tuning_job=hyperparameter_tuning_job_id)\n",
" hyperparameter_tuning_job=hyperparameter_tuning_job_id,\n",
" )\n",
" response = client.get_hyperparameter_tuning_job(name=name)\n",
" return response"
]
Expand All @@ -1313,7 +1333,8 @@
" location=REGION,\n",
" api_endpoint=f\"{REGION}-aiplatform.googleapis.com\")\n",
" if response.state.name == 'JOB_STATE_SUCCEEDED':\n",
" print(\"Job succeeded.\\nJob Time:\", response.update_time - response.create_time)\n",
" print(\"Job succeeded.\n",
"Job Time:\", response.update_time - response.create_time)\n",
" trials = response.trials\n",
" print(\"Trials:\", trials)\n",
" break\n",
Expand Down Expand Up @@ -1348,8 +1369,8 @@
"if trials:\n",
" # Dict mapping from metric names to the best metric values seen so far\n",
" best_objective_values = dict.fromkeys(\n",
" [metric.metric_id for metric in trials[0].final_measurement.metrics],\n",
" -np.inf)\n",
" [metric.metric_id for metric in trials[0].final_measurement.metrics], -np.inf\n",
" )\n",
" # Dict mapping from metric names to a list of the best combination(s) of\n",
" # hyperparameter(s). Each combination is a dict mapping from hyperparameter\n",
" # names to their values.\n",
Expand All @@ -1358,12 +1379,13 @@
" # `final_measurement` and `parameters` are `RepeatedComposite` objects.\n",
" # Reference the structure above to extract the value of your interest.\n",
" for metric in trial.final_measurement.metrics:\n",
" params = {\n",
" param.parameter_id: param.value for param in trial.parameters}\n",
" params = {param.parameter_id: param.value for param in trial.parameters}\n",
" if metric.value > best_objective_values[metric.metric_id]:\n",
" best_params[metric.metric_id] = [params]\n",
" elif metric.value == best_objective_values[metric.metric_id]:\n",
" best_params[param.parameter_id].append(params) # Handle cases where multiple hyperparameter values lead to the same performance.\n",
" best_params[param.parameter_id].append(\n",
" params\n",
" ) # Handle cases where multiple hyperparameter values lead to the same performance.\n",
" print(\"Best hyperparameter value(s):\")\n",
" for metric, params in best_params.items():\n",
" print(f\"Metric={metric}: {sorted(params)}\")\n",
Expand Down Expand Up @@ -1443,7 +1465,9 @@
},
"outputs": [],
"source": [
"PREDICTION_CONTAINER = \"prediction-custom-container\" # @param {type:\"string\"} Name of the container image."
"PREDICTION_CONTAINER = (\n",
" \"prediction-custom-container\" # @param {type:\"string\"} Name of the container image.\n",
")"
]
},
{
Expand Down Expand Up @@ -1475,7 +1499,7 @@
" machineType: 'E2_HIGHCPU_8'\"\"\".format(\n",
" PROJECT_ID=PROJECT_ID,\n",
" PREDICTION_CONTAINER=PREDICTION_CONTAINER,\n",
" ARTIFACTS_DIR=ARTIFACTS_DIR\n",
" ARTIFACTS_DIR=ARTIFACTS_DIR,\n",
")\n",
"\n",
"with open(\"cloudbuild.yaml\", \"w\") as fp:\n",
Expand Down Expand Up @@ -1592,8 +1616,12 @@
},
"outputs": [],
"source": [
"RUN_HYPERPARAMETER_TUNING = False # Execute regular training instead of hyperparameter tuning.\n",
"TRAIN_WITH_BEST_HYPERPARAMETERS = True # @param {type:\"bool\"} Whether to use learned hyperparameters in training."
"RUN_HYPERPARAMETER_TUNING = (\n",
" False # Execute regular training instead of hyperparameter tuning.\n",
")\n",
"TRAIN_WITH_BEST_HYPERPARAMETERS = (\n",
" True # @param {type:\"bool\"} Whether to use learned hyperparameters in training.\n",
")"
]
},
{
Expand Down Expand Up @@ -1633,10 +1661,12 @@
"job = aiplatform.CustomContainerTrainingJob(\n",
" display_name=\"train-movielens\",\n",
" container_uri=f\"gcr.io/{PROJECT_ID}/{HPTUNING_TRAINING_CONTAINER}:latest\",\n",
" command=[\"python3\", \"-m\", \"src.training.task\"] + args, # Pass in training arguments, including hyperparameters.\n",
" command=[\"python3\", \"-m\", \"src.training.task\"]\n",
" + args, # Pass in training arguments, including hyperparameters.\n",
" model_serving_container_image_uri=f\"gcr.io/{PROJECT_ID}/{PREDICTION_CONTAINER}:latest\",\n",
" model_serving_container_predict_route=\"/predict\",\n",
" model_serving_container_health_route=\"/health\")\n",
" model_serving_container_health_route=\"/health\",\n",
")\n",
"\n",
"print(\"Training Spec:\", job._managed_model)\n",
"\n",
Expand All @@ -1645,7 +1675,8 @@
" replica_count=1,\n",
" machine_type=\"n1-standard-4\",\n",
" accelerator_type=\"ACCELERATOR_TYPE_UNSPECIFIED\",\n",
" accelerator_count=0)"
" accelerator_count=0,\n",
")"
]
},
{
Expand Down Expand Up @@ -1784,7 +1815,7 @@
"! gcloud ai models delete $model.name --quiet\n",
"\n",
"# Delete Cloud Storage objects that were created\n",
"! gsutil -m rm -r $ARTIFACTS_DIR"
"! gcloud storage rm --recursive $ARTIFACTS_DIR"
]
}
],
Expand Down
Loading