diff --git a/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb b/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb new file mode 100644 index 00000000..cbdb13e7 --- /dev/null +++ b/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "385c47c3-27e8-4b51-b8b7-26c97b9a3ad3", + "metadata": {}, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n", + "\n", + "# delete the notebook's index\n", + "client.indices.delete(index=\"blogs\", ignore_unavailable=True)\n", + "\n", + "# delete the pipeline\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"vectorize_blogs\")\n", + "except:\n", + " pass\n", + "\n", + "# delete the model\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n", + "except:\n", + " pass" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb index ddd16b8b..f9bbaf4c 100644 --- a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb +++ b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb @@ -3,7 +3,10 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "# NLP text search using hugging face transformer model\n", @@ -44,14 +47,20 @@ }, "outputs": [], "source": [ - "# install packages\n", - "!python3 -m pip install -qU sentence-transformers eland elasticsearch transformers\n", - "\n", + "!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# import modules\n", - "import pandas as pd, json\n", "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", - "from urllib.request import urlopen" + "from urllib.request import urlopen\n", + "import json" ] }, { @@ -93,8 +102,15 @@ "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", "\n", "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", - "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", - "\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start" ] }, @@ -304,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -315,125 +331,40 @@ }, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_id_scorefields.title
0TxUU-YkBAHcz2kFqAun20.591786[Brewing in Beats: Track network connections]
1SxUU-YkBAHcz2kFqAun20.401099[Machine Learning for Nginx Logs - Identifying...
2UxUU-YkBAHcz2kFqAun20.390279[Data Visualization For Machine Learning]
3TBUU-YkBAHcz2kFqAun20.368995[Logstash Lines: Introduce integration plugins]
4UhUU-YkBAHcz2kFqAun20.368995[Logstash Lines: Introduce integration plugins]
5URUU-YkBAHcz2kFqAun20.356903[Keeping up with Kibana: This week in Kibana f...
6UBUU-YkBAHcz2kFqAun20.341939[Kibana 4 Video Tutorials, Part 3]
7VBUU-YkBAHcz2kFqAun20.337294[Introducing approximate nearest neighbor sear...
8ThUU-YkBAHcz2kFqAun20.336460[Where in the World is Elastic? - QCon Beijing...
9TRUU-YkBAHcz2kFqAun20.320756[EQL for the masses]
\n", - "
" - ], - "text/plain": [ - " _id _score \\\n", - "0 TxUU-YkBAHcz2kFqAun2 0.591786 \n", - "1 SxUU-YkBAHcz2kFqAun2 0.401099 \n", - "2 UxUU-YkBAHcz2kFqAun2 0.390279 \n", - "3 TBUU-YkBAHcz2kFqAun2 0.368995 \n", - "4 UhUU-YkBAHcz2kFqAun2 0.368995 \n", - "5 URUU-YkBAHcz2kFqAun2 0.356903 \n", - "6 UBUU-YkBAHcz2kFqAun2 0.341939 \n", - "7 VBUU-YkBAHcz2kFqAun2 0.337294 \n", - "8 ThUU-YkBAHcz2kFqAun2 0.336460 \n", - "9 TRUU-YkBAHcz2kFqAun2 0.320756 \n", - "\n", - " fields.title \n", - "0 [Brewing in Beats: Track network connections] \n", - "1 [Machine Learning for Nginx Logs - Identifying... \n", - "2 [Data Visualization For Machine Learning] \n", - "3 [Logstash Lines: Introduce integration plugins] \n", - "4 [Logstash Lines: Introduce integration plugins] \n", - "5 [Keeping up with Kibana: This week in Kibana f... \n", - "6 [Kibana 4 Video Tutorials, Part 3] \n", - "7 [Introducing approximate nearest neighbor sear... \n", - "8 [Where in the World is Elastic? - QCon Beijing... \n", - "9 [EQL for the masses] " - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "['Brewing in Beats: Track network connections']\n", + "Score: 0.5917864\n", + "\n", + "['Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website']\n", + "Score: 0.40109876\n", + "\n", + "['Data Visualization For Machine Learning']\n", + "Score: 0.39027885\n", + "\n", + "['Logstash Lines: Introduce integration plugins']\n", + "Score: 0.36899462\n", + "\n", + "['Keeping up with Kibana: This week in Kibana for November 29th, 2019']\n", + "Score: 0.35690257\n", + "\n", + "['How to implement similarity image search | Elastic.co | Elastic Blog']\n", + "Score: 0.34473613\n", + "\n", + "['Kibana 4 Video Tutorials, Part 3']\n", + "Score: 0.34193927\n", + "\n", + "['Introducing approximate nearest neighbor search in Elasticsearch 8.0 | Elastic Blog']\n", + "Score: 0.3372936\n", + "\n", + "['Where in the World is Elastic? - QCon Beijing, Devoxx France, Percona Live & AWS Summit Chicago']\n", + "Score: 0.33645985\n", + "\n", + "['EQL for the masses']\n", + "Score: 0.3207562\n", + "\n" + ] } ], "source": [ @@ -458,12 +389,19 @@ " knn=query,\n", " source=False)\n", "\n", - "\n", - "results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n", - "\n", - "# shows the result\n", - "results[['_id', '_score', 'fields.title']]\n" + "def show_results(results):\n", + " for result in results:\n", + " print(f'{result[\"fields\"][\"title\"]}\\nScore: {result[\"_score\"]}\\n')\n", + " \n", + "show_results(response.body['hits']['hits'])" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -471,13 +409,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3.11.3 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.9.6" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" }, "vscode": { "interpreter": { @@ -486,5 +432,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 }