googlyrahman · bhandarivijay-png · Oct 17, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/...andits_movie_recommendation/step_by_step_sdk_tf_agents_bandits_movie_recommendation.ipynb b/...andits_movie_recommendation/step_by_step_sdk_tf_agents_bandits_movie_recommendation.ipynb
@@ -398,6 +398,7 @@
         "if not IS_GOOGLE_CLOUD_NOTEBOOK:\n",
         "    if \"google.colab\" in sys.modules:\n",
         "        from google.colab import auth as google_auth\n",
+        "\n",
         "        google_auth.authenticate_user()\n",
         "\n",
         "    # If you are running this notebook locally, replace the string below with the\n",
@@ -472,7 +473,7 @@
       },
       "outputs": [],
       "source": [
-        "! gsutil mb -l $REGION $BUCKET_NAME"
+        "! gcloud storage buckets create --location $REGION $BUCKET_NAME"
       ]
     },
     {
@@ -492,7 +493,7 @@
       },
       "outputs": [],
       "source": [
-        "! gsutil ls -al $BUCKET_NAME"
+        "! gcloud storage ls --all-versions --long $BUCKET_NAME"
       ]
     },
     {
@@ -565,7 +566,7 @@
       "outputs": [],
       "source": [
         "# Copy the sample data into your DATA_PATH\n",
-        "! gsutil cp \"gs://cloud-samples-data/vertex-ai/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/u.data\"  $DATA_PATH"
+        "! gcloud storage cp \"gs://cloud-samples-data/vertex-ai/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/u.data\"  $DATA_PATH"
       ]
     },
     {
@@ -579,11 +580,15 @@
         "# Set hyperparameters.\n",
         "BATCH_SIZE = 8  # @param {type:\"integer\"} Training and prediction batch size.\n",
         "TRAINING_LOOPS = 5  # @param {type:\"integer\"} Number of training iterations.\n",
-        "STEPS_PER_LOOP = 2  # @param {type:\"integer\"} Number of driver steps per training iteration.\n",
+        "STEPS_PER_LOOP = (\n",
+        "    2  # @param {type:\"integer\"} Number of driver steps per training iteration.\n",
+        ")\n",
         "\n",
         "# Set MovieLens simulation environment parameters.\n",
         "RANK_K = 20  # @param {type:\"integer\"} Rank for matrix factorization in the MovieLens environment; also the observation dimension.\n",
-        "NUM_ACTIONS = 20  # @param {type:\"integer\"} Number of actions (movie items) to choose from.\n",
+        "NUM_ACTIONS = (\n",
+        "    20  # @param {type:\"integer\"} Number of actions (movie items) to choose from.\n",
+        ")\n",
         "PER_ARM = False  # Use the non-per-arm version of the MovieLens environment.\n",
         "\n",
         "# Set agent parameters.\n",
@@ -621,7 +626,8 @@
       "source": [
         "# Define RL environment.\n",
         "env = movielens_py_environment.MovieLensPyEnvironment(\n",
-        "    DATA_PATH, RANK_K, BATCH_SIZE, num_movies=NUM_ACTIONS, csv_delimiter=\"\\t\")\n",
+        "    DATA_PATH, RANK_K, BATCH_SIZE, num_movies=NUM_ACTIONS, csv_delimiter=\"\\t\"\n",
+        ")\n",
         "environment = tf_py_environment.TFPyEnvironment(env)\n",
         "\n",
         "# Define RL agent/algorithm.\n",
@@ -631,15 +637,17 @@
         "    tikhonov_weight=TIKHONOV_WEIGHT,\n",
         "    alpha=AGENT_ALPHA,\n",
         "    dtype=tf.float32,\n",
-        "    accepts_per_arm_features=PER_ARM)\n",
+        "    accepts_per_arm_features=PER_ARM,\n",
+        ")\n",
         "print(\"TimeStep Spec (for each batch):\\n\", agent.time_step_spec, \"\\n\")\n",
         "print(\"Action Spec (for each batch):\\n\", agent.action_spec, \"\\n\")\n",
         "print(\"Reward Spec (for each batch):\\n\", environment.reward_spec(), \"\\n\")\n",
         "\n",
         "# Define RL metric.\n",
         "optimal_reward_fn = functools.partial(\n",
         "    environment_utilities.compute_optimal_reward_with_movielens_environment,\n",
-        "    environment=environment)\n",
+        "    environment=environment,\n",
+        ")\n",
         "regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn)\n",
         "metrics = [regret_metric]"
       ]
@@ -704,35 +712,38 @@
         "    if training_data_spec_transformation_fn is None:\n",
         "        data_spec = agent.policy.trajectory_spec\n",
         "    else:\n",
-        "        data_spec = training_data_spec_transformation_fn(\n",
-        "            agent.policy.trajectory_spec)\n",
-        "    replay_buffer = trainer.get_replay_buffer(data_spec, environment.batch_size,\n",
-        "                                              steps_per_loop)\n",
+        "        data_spec = training_data_spec_transformation_fn(agent.policy.trajectory_spec)\n",
+        "    replay_buffer = trainer.get_replay_buffer(\n",
+        "        data_spec, environment.batch_size, steps_per_loop\n",
+        "    )\n",
         "\n",
         "    # `step_metric` records the number of individual rounds of bandit interaction;\n",
         "    # that is, (number of trajectories) * batch_size.\n",
         "    step_metric = tf_metrics.EnvironmentSteps()\n",
         "    metrics = [\n",
         "        tf_metrics.NumberOfEpisodes(),\n",
-        "        tf_metrics.AverageEpisodeLengthMetric(batch_size=environment.batch_size)\n",
+        "        tf_metrics.AverageEpisodeLengthMetric(batch_size=environment.batch_size),\n",
         "    ]\n",
         "    if additional_metrics:\n",
         "        metrics += additional_metrics\n",
         "\n",
         "    if isinstance(environment.reward_spec(), dict):\n",
-        "        metrics += [tf_metrics.AverageReturnMultiMetric(\n",
-        "            reward_spec=environment.reward_spec(),\n",
-        "            batch_size=environment.batch_size)]\n",
-        "    else:\n",
         "        metrics += [\n",
-        "            tf_metrics.AverageReturnMetric(batch_size=environment.batch_size)]\n",
+        "            tf_metrics.AverageReturnMultiMetric(\n",
+        "                reward_spec=environment.reward_spec(), batch_size=environment.batch_size\n",
+        "            )\n",
+        "        ]\n",
+        "    else:\n",
+        "        metrics += [tf_metrics.AverageReturnMetric(batch_size=environment.batch_size)]\n",
         "\n",
         "    # Store intermediate metric results, indexed by metric names.\n",
         "    metric_results = defaultdict(list)\n",
         "\n",
         "    if training_data_spec_transformation_fn is not None:\n",
-        "        def add_batch_fn(data): return replay_buffer.add_batch(training_data_spec_transformation_fn(data)) \n",
-        "        \n",
+        "\n",
+        "        def add_batch_fn(data):\n",
+        "            return replay_buffer.add_batch(training_data_spec_transformation_fn(data))\n",
+        "\n",
         "    else:\n",
         "        add_batch_fn = replay_buffer.add_batch\n",
         "\n",
@@ -742,10 +753,12 @@
         "        env=environment,\n",
         "        policy=agent.collect_policy,\n",
         "        num_steps=steps_per_loop * environment.batch_size,\n",
-        "        observers=observers)\n",
+        "        observers=observers,\n",
+        "    )\n",
         "\n",
         "    training_loop = trainer.get_training_loop_fn(\n",
-        "        driver, replay_buffer, agent, steps_per_loop)\n",
+        "        driver, replay_buffer, agent, steps_per_loop\n",
+        "    )\n",
         "    saver = policy_saver.PolicySaver(agent.policy)\n",
         "\n",
         "    for _ in range(training_loops):\n",
@@ -783,7 +796,8 @@
         "    environment=environment,\n",
         "    training_loops=TRAINING_LOOPS,\n",
         "    steps_per_loop=STEPS_PER_LOOP,\n",
-        "    additional_metrics=metrics)\n",
+        "    additional_metrics=metrics,\n",
+        ")\n",
         "\n",
         "tf.profiler.experimental.stop()"
       ]
@@ -1092,11 +1106,15 @@
       },
       "outputs": [],
       "source": [
-        "RUN_HYPERPARAMETER_TUNING = True  # Execute hyperparameter tuning instead of regular training.\n",
+        "RUN_HYPERPARAMETER_TUNING = (\n",
+        "    True  # Execute hyperparameter tuning instead of regular training.\n",
+        ")\n",
         "TRAIN_WITH_BEST_HYPERPARAMETERS = False  # Do not train.\n",
         "\n",
         "HPTUNING_RESULT_DIR = \"hptuning/\"  # @param {type: \"string\"} Directory to store the best hyperparameter(s) in `BUCKET_NAME` and locally (temporarily).\n",
-        "HPTUNING_RESULT_PATH = os.path.join(HPTUNING_RESULT_DIR, \"result.json\")  # @param {type: \"string\"} Path to the file containing the best hyperparameter(s)."
+        "HPTUNING_RESULT_PATH = os.path.join(\n",
+        "    HPTUNING_RESULT_DIR, \"result.json\"\n",
+        ")  # @param {type: \"string\"} Path to the file containing the best hyperparameter(s)."
       ]
     },
     {
@@ -1124,7 +1142,7 @@
         "    image_uri: str,\n",
         "    args: List[str],\n",
         "    location: str = \"us-central1\",\n",
-        "    api_endpoint: str = \"us-central1-aiplatform.googleapis.com\"\n",
+        "    api_endpoint: str = \"us-central1-aiplatform.googleapis.com\",\n",
         ") -> None:\n",
         "    \"\"\"Creates a hyperparameter tuning job using a custom container.\n",
         "\n",
@@ -1197,8 +1215,8 @@
         "\n",
         "    # Create job\n",
         "    response = client.create_hyperparameter_tuning_job(\n",
-        "        parent=parent,\n",
-        "        hyperparameter_tuning_job=hyperparameter_tuning_job)\n",
+        "        parent=parent, hyperparameter_tuning_job=hyperparameter_tuning_job\n",
+        "    )\n",
         "    job_id = response.name.split(\"/\")[-1]\n",
         "    print(\"Job ID:\", job_id)\n",
         "    print(\"Job config:\", response)\n",
@@ -1242,7 +1260,8 @@
         "    image_uri=f\"gcr.io/{PROJECT_ID}/{HPTUNING_TRAINING_CONTAINER}:latest\",\n",
         "    args=args,\n",
         "    location=REGION,\n",
-        "    api_endpoint=f\"{REGION}-aiplatform.googleapis.com\")"
+        "    api_endpoint=f\"{REGION}-aiplatform.googleapis.com\",\n",
+        ")"
       ]
     },
     {
@@ -1292,7 +1311,8 @@
         "    name = client.hyperparameter_tuning_job_path(\n",
         "        project=project,\n",
         "        location=location,\n",
-        "        hyperparameter_tuning_job=hyperparameter_tuning_job_id)\n",
+        "        hyperparameter_tuning_job=hyperparameter_tuning_job_id,\n",
+        "    )\n",
         "    response = client.get_hyperparameter_tuning_job(name=name)\n",
         "    return response"
       ]
@@ -1313,7 +1333,8 @@
         "        location=REGION,\n",
         "        api_endpoint=f\"{REGION}-aiplatform.googleapis.com\")\n",
         "    if response.state.name == 'JOB_STATE_SUCCEEDED':\n",
-        "        print(\"Job succeeded.\\nJob Time:\", response.update_time - response.create_time)\n",
+        "        print(\"Job succeeded.\n",
+        "Job Time:\", response.update_time - response.create_time)\n",
         "        trials = response.trials\n",
         "        print(\"Trials:\", trials)\n",
         "        break\n",
@@ -1348,8 +1369,8 @@
         "if trials:\n",
         "    # Dict mapping from metric names to the best metric values seen so far\n",
         "    best_objective_values = dict.fromkeys(\n",
-        "        [metric.metric_id for metric in trials[0].final_measurement.metrics],\n",
-        "        -np.inf)\n",
+        "        [metric.metric_id for metric in trials[0].final_measurement.metrics], -np.inf\n",
+        "    )\n",
         "    # Dict mapping from metric names to a list of the best combination(s) of\n",
         "    # hyperparameter(s). Each combination is a dict mapping from hyperparameter\n",
         "    # names to their values.\n",
@@ -1358,12 +1379,13 @@
         "        # `final_measurement` and `parameters` are `RepeatedComposite` objects.\n",
         "        # Reference the structure above to extract the value of your interest.\n",
         "        for metric in trial.final_measurement.metrics:\n",
-        "            params = {\n",
-        "                param.parameter_id: param.value for param in trial.parameters}\n",
+        "            params = {param.parameter_id: param.value for param in trial.parameters}\n",
         "            if metric.value > best_objective_values[metric.metric_id]:\n",
         "                best_params[metric.metric_id] = [params]\n",
         "            elif metric.value == best_objective_values[metric.metric_id]:\n",
-        "                best_params[param.parameter_id].append(params)  # Handle cases where multiple hyperparameter values lead to the same performance.\n",
+        "                best_params[param.parameter_id].append(\n",
+        "                    params\n",
+        "                )  # Handle cases where multiple hyperparameter values lead to the same performance.\n",
         "    print(\"Best hyperparameter value(s):\")\n",
         "    for metric, params in best_params.items():\n",
         "        print(f\"Metric={metric}: {sorted(params)}\")\n",
@@ -1443,7 +1465,9 @@
       },
       "outputs": [],
       "source": [
-        "PREDICTION_CONTAINER = \"prediction-custom-container\"  # @param {type:\"string\"} Name of the container image."
+        "PREDICTION_CONTAINER = (\n",
+        "    \"prediction-custom-container\"  # @param {type:\"string\"} Name of the container image.\n",
+        ")"
       ]
     },
     {
@@ -1475,7 +1499,7 @@
         "  machineType: 'E2_HIGHCPU_8'\"\"\".format(\n",
         "    PROJECT_ID=PROJECT_ID,\n",
         "    PREDICTION_CONTAINER=PREDICTION_CONTAINER,\n",
-        "    ARTIFACTS_DIR=ARTIFACTS_DIR\n",
+        "    ARTIFACTS_DIR=ARTIFACTS_DIR,\n",
         ")\n",
         "\n",
         "with open(\"cloudbuild.yaml\", \"w\") as fp:\n",
@@ -1592,8 +1616,12 @@
       },
       "outputs": [],
       "source": [
-        "RUN_HYPERPARAMETER_TUNING = False  # Execute regular training instead of hyperparameter tuning.\n",
-        "TRAIN_WITH_BEST_HYPERPARAMETERS = True  # @param {type:\"bool\"} Whether to use learned hyperparameters in training."
+        "RUN_HYPERPARAMETER_TUNING = (\n",
+        "    False  # Execute regular training instead of hyperparameter tuning.\n",
+        ")\n",
+        "TRAIN_WITH_BEST_HYPERPARAMETERS = (\n",
+        "    True  # @param {type:\"bool\"} Whether to use learned hyperparameters in training.\n",
+        ")"
       ]
     },
     {
@@ -1633,10 +1661,12 @@
         "job = aiplatform.CustomContainerTrainingJob(\n",
         "    display_name=\"train-movielens\",\n",
         "    container_uri=f\"gcr.io/{PROJECT_ID}/{HPTUNING_TRAINING_CONTAINER}:latest\",\n",
-        "    command=[\"python3\", \"-m\", \"src.training.task\"] + args,  # Pass in training arguments, including hyperparameters.\n",
+        "    command=[\"python3\", \"-m\", \"src.training.task\"]\n",
+        "    + args,  # Pass in training arguments, including hyperparameters.\n",
         "    model_serving_container_image_uri=f\"gcr.io/{PROJECT_ID}/{PREDICTION_CONTAINER}:latest\",\n",
         "    model_serving_container_predict_route=\"/predict\",\n",
-        "    model_serving_container_health_route=\"/health\")\n",
+        "    model_serving_container_health_route=\"/health\",\n",
+        ")\n",
         "\n",
         "print(\"Training Spec:\", job._managed_model)\n",
         "\n",
@@ -1645,7 +1675,8 @@
         "    replica_count=1,\n",
         "    machine_type=\"n1-standard-4\",\n",
         "    accelerator_type=\"ACCELERATOR_TYPE_UNSPECIFIED\",\n",
-        "    accelerator_count=0)"
+        "    accelerator_count=0,\n",
+        ")"
       ]
     },
     {
@@ -1784,7 +1815,7 @@
         "! gcloud ai models delete $model.name --quiet\n",
         "\n",
         "# Delete Cloud Storage objects that were created\n",
-        "! gsutil -m rm -r $ARTIFACTS_DIR"
+        "! gcloud storage rm --recursive $ARTIFACTS_DIR"
       ]
     }
   ],