|
17 | 17 | "source": [ |
18 | 18 | "💻 **Launch Locally**: You can run this notebook locally, but performance will be reduced.\n", |
19 | 19 | "\n", |
20 | | - "🚀 **Launch on Cloud**: A Ray Cluster with 4 GPUs (Click [here](http://console.anyscale.com/register) to easily start a Ray cluster on Anyscale) is recommanded to run this notebook." |
| 20 | + "🚀 **Launch on Cloud**: A Ray Cluster with 4 GPUs (Click [here](http://console.anyscale.com/register) to easily start a Ray cluster on Anyscale) is recommended to run this notebook." |
21 | 21 | ] |
22 | 22 | }, |
23 | 23 | { |
|
51 | 51 | { |
52 | 52 | "cell_type": "code", |
53 | 53 | "execution_count": null, |
54 | | - "metadata": { |
55 | | - "tags": [] |
56 | | - }, |
| 54 | + "metadata": {}, |
| 55 | + "outputs": [], |
| 56 | + "source": [ |
| 57 | + "# (Optional): If you get an XGBoostError at import, you might have to `brew install libomp` before importing xgboost again\n", |
| 58 | + "!brew install libomp" |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "cell_type": "code", |
| 63 | + "execution_count": null, |
| 64 | + "metadata": {}, |
57 | 65 | "outputs": [], |
58 | 66 | "source": [ |
59 | 67 | "import asyncio\n", |
60 | 68 | "import fastapi\n", |
61 | 69 | "import pandas as pd\n", |
62 | 70 | "import requests\n", |
| 71 | + "# macos: If you get an XGBoostError at import, you might have to `brew install libomp` before importing xgboost again\n", |
63 | 72 | "import xgboost\n", |
64 | 73 | "from pydantic import BaseModel\n", |
65 | 74 | "from sklearn.model_selection import train_test_split\n", |
|
68 | 77 | "import ray.tune\n", |
69 | 78 | "import ray.train\n", |
70 | 79 | "from ray.train.xgboost import XGBoostTrainer as RayTrainXGBoostTrainer\n", |
| 80 | + "from ray.train import RunConfig\n", |
71 | 81 | "import ray.data\n", |
72 | 82 | "import ray.serve" |
73 | 83 | ] |
|
86 | 96 | "\n", |
87 | 97 | "|<img src=\"https://technical-training-assets.s3.us-west-2.amazonaws.com/Introduction_to_Ray_AIR/e2e_air.png\" width=\"100%\" loading=\"lazy\">|\n", |
88 | 98 | "|:-:|\n", |
89 | | - "|Ray AI Libraries enable end-to-end ML development and provides multiple options for integrating with other tools and libraries form the MLOps ecosystem.|\n", |
| 99 | + "|Ray AI Libraries enable end-to-end ML development and provides multiple options for integrating with other tools and libraries from the MLOps ecosystem.|\n", |
90 | 100 | "\n" |
91 | 101 | ] |
92 | 102 | }, |
|
108 | 118 | "* **`fare_amount`**\n", |
109 | 119 | " * Float representing total price including tax, tip, fees, etc.\n", |
110 | 120 | "* **`tolls_amount`**\n", |
111 | | - " * Float represnting the total paid on tolls if any.\n", |
| 121 | + " * Float representing the total paid on tolls if any.\n", |
112 | 122 | "\n", |
113 | 123 | "**Target**\n", |
114 | 124 | "* **`trip_amount`**\n", |
|
177 | 187 | "metadata": {}, |
178 | 188 | "outputs": [], |
179 | 189 | "source": [ |
180 | | - "model_path = \"/mnt/cluster_storage/model.ubj\" # Modify this path to your local folder if it runs on your local environment" |
| 190 | + "storage_folder = \"/mnt/cluster_storage/\" # Modify this path to your local folder if it runs on your local environment" |
181 | 191 | ] |
182 | 192 | }, |
183 | 193 | { |
|
186 | 196 | "metadata": {}, |
187 | 197 | "outputs": [], |
188 | 198 | "source": [ |
| 199 | + "from pathlib import Path\n", |
| 200 | + "model_path = Path(storage_folder) / \"model.ubj\"\n", |
| 201 | + "\n", |
189 | 202 | "def my_xgboost_func(params): \n", |
190 | 203 | " evals_result = {}\n", |
191 | 204 | " dtrain, dtest = load_data()\n", |
|
196 | 209 | " evals=[(dtest, \"eval\")], \n", |
197 | 210 | " evals_result=evals_result,\n", |
198 | 211 | " )\n", |
| 212 | + " # Use Path\n", |
199 | 213 | " bst.save_model(model_path)\n", |
200 | 214 | " print(f\"{evals_result['eval']}\")\n", |
201 | 215 | " return {\"eval-rmse\": evals_result[\"eval\"][\"rmse\"][-1]}\n", |
|
234 | 248 | " \"max_depth\": 6,\n", |
235 | 249 | " \"eta\": ray.tune.uniform(0.01, 0.3),\n", |
236 | 250 | " },\n", |
| 251 | + " run_config=RunConfig(storage_path=storage_folder),\n", |
237 | 252 | " tune_config=ray.tune.TuneConfig( # Tell it which metric to tune\n", |
238 | 253 | " metric=\"eval-rmse\",\n", |
239 | 254 | " mode=\"min\",\n", |
|
264 | 279 | "\n", |
265 | 280 | "In case your training data is too large, your training might take a long time to complete.\n", |
266 | 281 | "\n", |
267 | | - "To speed it up, shard the dataset across training workers and perform distributed XBoost training.\n", |
| 282 | + "To speed it up, shard the dataset across training workers and perform distributed XGBoost training.\n", |
268 | 283 | "\n", |
269 | 284 | "Let's redefine `load_data` to now load a different slice of the data given the worker index/rank." |
270 | 285 | ] |
|
484 | 499 | "metadata": {}, |
485 | 500 | "outputs": [], |
486 | 501 | "source": [ |
487 | | - "prediction_pipeline.write_parquet(\"/mnt/cluster_storage/xgboost_predictions\") #update this to your local path if runs on your local" |
| 502 | + "prediction_pipeline.write_parquet(\"./xgboost_predictions\") #update this to your local path if runs on your local" |
488 | 503 | ] |
489 | 504 | }, |
490 | 505 | { |
|
500 | 515 | "metadata": {}, |
501 | 516 | "outputs": [], |
502 | 517 | "source": [ |
503 | | - "!ls /mnt/cluster_storage/xgboost_predictions/ #update this to your local path if runs on your local" |
| 518 | + "!ls {storage_folder}/xgboost_predictions/" |
| 519 | + ] |
| 520 | + }, |
| 521 | + { |
| 522 | + "cell_type": "markdown", |
| 523 | + "metadata": {}, |
| 524 | + "source": [ |
| 525 | + "### 2.6 Clean up" |
504 | 526 | ] |
505 | 527 | }, |
506 | 528 | { |
507 | 529 | "cell_type": "code", |
508 | 530 | "execution_count": null, |
509 | 531 | "metadata": {}, |
510 | 532 | "outputs": [], |
511 | | - "source": [] |
| 533 | + "source": [ |
| 534 | + "# Run this cell for file cleanup \n", |
| 535 | + "!rm -rf {storage_folder}/xgboost_predictions/\n", |
| 536 | + "!rm {model_path}" |
| 537 | + ] |
512 | 538 | } |
513 | 539 | ], |
514 | 540 | "metadata": { |
515 | 541 | "kernelspec": { |
516 | | - "display_name": "Xing-ray-jupyter-3.11", |
| 542 | + "display_name": "ray-jupyter", |
517 | 543 | "language": "python", |
518 | | - "name": "xing-ray-jupyter" |
| 544 | + "name": "python3" |
519 | 545 | }, |
520 | 546 | "language_info": { |
521 | 547 | "codemirror_mode": { |
|
0 commit comments