From e6110bd86f20018d0a34487444ef0fd2174dbf54 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Mon, 1 Jul 2024 11:57:50 -0400 Subject: [PATCH] Wait for ELSER service to be deployed --- notebooks/search/09-semantic-text.ipynb | 78 ++++++++++++++++++++----- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/notebooks/search/09-semantic-text.ipynb b/notebooks/search/09-semantic-text.ipynb index ec56c112..798d961d 100644 --- a/notebooks/search/09-semantic-text.ipynb +++ b/notebooks/search/09-semantic-text.ipynb @@ -36,9 +36,7 @@ "source": [ "## Create Elastic Cloud deployment\n", "\n", - "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?utm_source=github&utm_content=elasticsearch-labs-notebook) for a free trial.\n", - "\n", - "TODO: Instruct user to disable ML node autoscaling?" + "If you don't have an Elastic Cloud deployment, sign up [here](https://cloud.elastic.co/registration?utm_source=github&utm_content=elasticsearch-labs-notebook) for a free trial." ], "metadata": { "collapsed": false @@ -94,7 +92,8 @@ "from elasticsearch import Elasticsearch, exceptions\n", "from urllib.request import urlopen\n", "from getpass import getpass\n", - "import json" + "import json\n", + "import time" ], "metadata": { "collapsed": false @@ -207,9 +206,7 @@ "\n", "Let's create the inference endpoint by using the [Create inference API](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html).\n", "\n", - "For this example we'll use the [ELSER service](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), but the inference API also supports [many other inference services](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html#put-inference-api-desc).\n", - "\n", - "NOTE: If the inference creation request times out, wait a moment and try again" + "For this example we'll use the [ELSER service](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), but the inference API also supports [many other inference services](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-inference-api.html#put-inference-api-desc)." ], "metadata": { "collapsed": false @@ -227,20 +224,71 @@ " # Inference endpoint does not exist\n", " pass\n", "\n", - "client.options(request_timeout=60).inference.put_model(\n", - " task_type=\"sparse_embedding\",\n", - " inference_id=\"my-elser-endpoint\",\n", - " body={\n", - " \"service\": \"elser\",\n", - " \"service_settings\": {\"num_allocations\": 1, \"num_threads\": 1},\n", - " },\n", - ")" + "try:\n", + " client.options(request_timeout=60, max_retries=3, retry_on_timeout=True).inference.put_model(\n", + " task_type=\"sparse_embedding\",\n", + " inference_id=\"my-elser-endpoint\",\n", + " body={\n", + " \"service\": \"elser\",\n", + " \"service_settings\": {\"num_allocations\": 1, \"num_threads\": 1},\n", + " },\n", + " )\n", + " print(\"Inference endpoint created successfully\")\n", + "except exceptions.BadRequestError as e:\n", + " if e.error == \"resource_already_exists_exception\":\n", + " print(\"Inference endpoint created successfully\")\n", + " else:\n", + " raise e\n" ], "metadata": { "collapsed": false }, "id": "8ee2188ea71324f5" }, + { + "cell_type": "markdown", + "source": [ + "Once the endpoint is created, we must wait until the backing ELSER service is deployed.\n", + "This can take a few minutes to complete." + ], + "metadata": { + "collapsed": false + }, + "id": "e94fd66761fd8087" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "inference_endpoint_info = client.inference.get_model(\n", + " inference_id=\"my-elser-endpoint\",\n", + ")\n", + "model_id = inference_endpoint_info[\"endpoints\"][0][\"service_settings\"][\"model_id\"]\n", + "\n", + "while True:\n", + " status = client.ml.get_trained_models_stats(\n", + " model_id=model_id,\n", + " )\n", + "\n", + " deployment_stats = status[\"trained_model_stats\"][0].get(\"deployment_stats\")\n", + " if deployment_stats is None:\n", + " print(\"ELSER Model is currently being deployed.\")\n", + " continue\n", + " \n", + " nodes = deployment_stats.get(\"nodes\")\n", + " if nodes is not None and len(nodes) > 0:\n", + " print(\"ELSER Model has been successfully deployed.\")\n", + " break\n", + " else:\n", + " print(\"ELSER Model is currently being deployed.\")\n", + " time.sleep(5)" + ], + "metadata": { + "collapsed": false + }, + "id": "adb33329ce20b2f1" + }, { "cell_type": "markdown", "source": [