From e6110bd86f20018d0a34487444ef0fd2174dbf54 Mon Sep 17 00:00:00 2001
From: Mike Pellegrini <mike.pellegrini@elastic.co>
Date: Mon, 1 Jul 2024 11:57:50 -0400
Subject: [PATCH] Wait for ELSER service to be deployed

---
 notebooks/search/09-semantic-text.ipynb | 78 ++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 15 deletions(-)

diff --git a/notebooks/search/09-semantic-text.ipynb b/notebooks/search/09-semantic-text.ipynb
index ec56c112..798d961d 100644
--- a/notebooks/search/09-semantic-text.ipynb
+++ b/notebooks/search/09-semantic-text.ipynb
@@ -36,9 +36,7 @@
    "source": [
     "## Create Elastic Cloud deployment\n",
     "\n",
-    "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?utm_source=github&utm_content=elasticsearch-labs-notebook) for a free trial.\n",
-    "\n",
-    "TODO: Instruct user to disable ML node autoscaling?"
+    "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?utm_source=github&utm_content=elasticsearch-labs-notebook) for a free trial."
    ],
    "metadata": {
     "collapsed": false
@@ -94,7 +92,8 @@
     "from elasticsearch import Elasticsearch, exceptions\n",
     "from urllib.request import urlopen\n",
     "from getpass import getpass\n",
-    "import json"
+    "import json\n",
+    "import time"
    ],
    "metadata": {
     "collapsed": false
@@ -207,9 +206,7 @@
     "\n",
     "Let's create the inference endpoint by using the [Create inference API](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html).\n",
     "\n",
-    "For this example we'll use the [ELSER service](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), but the inference API also supports [many other inference services](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html#put-inference-api-desc).\n",
-    "\n",
-    "NOTE: If the inference creation request times out, wait a moment and try again"
+    "For this example we'll use the [ELSER service](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), but the inference API also supports [many other inference services](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html#put-inference-api-desc)."
    ],
    "metadata": {
     "collapsed": false
@@ -227,20 +224,71 @@
     "    # Inference endpoint does not exist\n",
     "    pass\n",
     "\n",
-    "client.options(request_timeout=60).inference.put_model(\n",
-    "    task_type=\"sparse_embedding\",\n",
-    "    inference_id=\"my-elser-endpoint\",\n",
-    "    body={\n",
-    "        \"service\": \"elser\",\n",
-    "        \"service_settings\": {\"num_allocations\": 1, \"num_threads\": 1},\n",
-    "    },\n",
-    ")"
+    "try:\n",
+    "    client.options(request_timeout=60, max_retries=3, retry_on_timeout=True).inference.put_model(\n",
+    "        task_type=\"sparse_embedding\",\n",
+    "        inference_id=\"my-elser-endpoint\",\n",
+    "        body={\n",
+    "            \"service\": \"elser\",\n",
+    "            \"service_settings\": {\"num_allocations\": 1, \"num_threads\": 1},\n",
+    "        },\n",
+    "    )\n",
+    "    print(\"Inference endpoint created successfully\")\n",
+    "except exceptions.BadRequestError as e:\n",
+    "    if e.error == \"resource_already_exists_exception\":\n",
+    "        print(\"Inference endpoint created successfully\")\n",
+    "    else:\n",
+    "        raise e\n"
    ],
    "metadata": {
     "collapsed": false
    },
    "id": "8ee2188ea71324f5"
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Once the endpoint is created, we must wait until the backing ELSER service is deployed.\n",
+    "This can take a few minutes to complete."
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "e94fd66761fd8087"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "inference_endpoint_info = client.inference.get_model(\n",
+    "    inference_id=\"my-elser-endpoint\",\n",
+    ")\n",
+    "model_id = inference_endpoint_info[\"endpoints\"][0][\"service_settings\"][\"model_id\"]\n",
+    "\n",
+    "while True:\n",
+    "    status = client.ml.get_trained_models_stats(\n",
+    "        model_id=model_id,\n",
+    "    )\n",
+    "\n",
+    "    deployment_stats = status[\"trained_model_stats\"][0].get(\"deployment_stats\")\n",
+    "    if deployment_stats is None:\n",
+    "        print(\"ELSER Model is currently being deployed.\")\n",
+    "        continue\n",
+    "    \n",
+    "    nodes = deployment_stats.get(\"nodes\")\n",
+    "    if nodes is not None and len(nodes) > 0:\n",
+    "        print(\"ELSER Model has been successfully deployed.\")\n",
+    "        break\n",
+    "    else:\n",
+    "        print(\"ELSER Model is currently being deployed.\")\n",
+    "    time.sleep(5)"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "adb33329ce20b2f1"
+  },
   {
    "cell_type": "markdown",
    "source": [