diff --git a/Makefile b/Makefile index 981b7f2c..103c3ebc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ test: nbtest notebooks -notebooks: search document-chunking +notebooks: search document-chunking model-upgrades langchain search: $(MAKE) -C notebooks/search @@ -10,6 +10,12 @@ search: document-chunking: $(MAKE) -C notebooks/document-chunking +model-upgrades: + $(MAKE) -C notebooks/model-upgrades + +langchain: + $(MAKE) -C notebooks/langchain + install: pre-commit nbtest pre-commit: diff --git a/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb b/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb new file mode 100644 index 00000000..cbdb13e7 --- /dev/null +++ b/notebooks/integrations/hugging-face/_nbtest.teardown.loading-model-from-hugging-face.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "385c47c3-27e8-4b51-b8b7-26c97b9a3ad3", + "metadata": {}, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n", + "\n", + "# delete the notebook's index\n", + "client.indices.delete(index=\"blogs\", ignore_unavailable=True)\n", + "\n", + "# delete the pipeline\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"vectorize_blogs\")\n", + "except:\n", + " pass\n", + "\n", + "# delete the model\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n", + "except:\n", + " pass" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb index ddd16b8b..f9bbaf4c 100644 --- a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb +++ b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb @@ -3,7 +3,10 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "source": [ "# NLP text search using hugging face transformer model\n", @@ -44,14 +47,20 @@ }, "outputs": [], "source": [ - "# install packages\n", - "!python3 -m pip install -qU sentence-transformers eland elasticsearch transformers\n", - "\n", + "!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# import modules\n", - "import pandas as pd, json\n", "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", - "from urllib.request import urlopen" + "from urllib.request import urlopen\n", + "import json" ] }, { @@ -93,8 +102,15 @@ "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", "\n", "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", - "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", - "\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start" ] }, @@ -304,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -315,125 +331,40 @@ }, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
_id_scorefields.title
0TxUU-YkBAHcz2kFqAun20.591786[Brewing in Beats: Track network connections]
1SxUU-YkBAHcz2kFqAun20.401099[Machine Learning for Nginx Logs - Identifying...
2UxUU-YkBAHcz2kFqAun20.390279[Data Visualization For Machine Learning]
3TBUU-YkBAHcz2kFqAun20.368995[Logstash Lines: Introduce integration plugins]
4UhUU-YkBAHcz2kFqAun20.368995[Logstash Lines: Introduce integration plugins]
5URUU-YkBAHcz2kFqAun20.356903[Keeping up with Kibana: This week in Kibana f...
6UBUU-YkBAHcz2kFqAun20.341939[Kibana 4 Video Tutorials, Part 3]
7VBUU-YkBAHcz2kFqAun20.337294[Introducing approximate nearest neighbor sear...
8ThUU-YkBAHcz2kFqAun20.336460[Where in the World is Elastic? - QCon Beijing...
9TRUU-YkBAHcz2kFqAun20.320756[EQL for the masses]
\n", - "
" - ], - "text/plain": [ - " _id _score \\\n", - "0 TxUU-YkBAHcz2kFqAun2 0.591786 \n", - "1 SxUU-YkBAHcz2kFqAun2 0.401099 \n", - "2 UxUU-YkBAHcz2kFqAun2 0.390279 \n", - "3 TBUU-YkBAHcz2kFqAun2 0.368995 \n", - "4 UhUU-YkBAHcz2kFqAun2 0.368995 \n", - "5 URUU-YkBAHcz2kFqAun2 0.356903 \n", - "6 UBUU-YkBAHcz2kFqAun2 0.341939 \n", - "7 VBUU-YkBAHcz2kFqAun2 0.337294 \n", - "8 ThUU-YkBAHcz2kFqAun2 0.336460 \n", - "9 TRUU-YkBAHcz2kFqAun2 0.320756 \n", - "\n", - " fields.title \n", - "0 [Brewing in Beats: Track network connections] \n", - "1 [Machine Learning for Nginx Logs - Identifying... \n", - "2 [Data Visualization For Machine Learning] \n", - "3 [Logstash Lines: Introduce integration plugins] \n", - "4 [Logstash Lines: Introduce integration plugins] \n", - "5 [Keeping up with Kibana: This week in Kibana f... \n", - "6 [Kibana 4 Video Tutorials, Part 3] \n", - "7 [Introducing approximate nearest neighbor sear... \n", - "8 [Where in the World is Elastic? - QCon Beijing... \n", - "9 [EQL for the masses] " - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "['Brewing in Beats: Track network connections']\n", + "Score: 0.5917864\n", + "\n", + "['Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website']\n", + "Score: 0.40109876\n", + "\n", + "['Data Visualization For Machine Learning']\n", + "Score: 0.39027885\n", + "\n", + "['Logstash Lines: Introduce integration plugins']\n", + "Score: 0.36899462\n", + "\n", + "['Keeping up with Kibana: This week in Kibana for November 29th, 2019']\n", + "Score: 0.35690257\n", + "\n", + "['How to implement similarity image search | Elastic.co | Elastic Blog']\n", + "Score: 0.34473613\n", + "\n", + "['Kibana 4 Video Tutorials, Part 3']\n", + "Score: 0.34193927\n", + "\n", + "['Introducing approximate nearest neighbor search in Elasticsearch 8.0 | Elastic Blog']\n", + "Score: 0.3372936\n", + "\n", + "['Where in the World is Elastic? - QCon Beijing, Devoxx France, Percona Live & AWS Summit Chicago']\n", + "Score: 0.33645985\n", + "\n", + "['EQL for the masses']\n", + "Score: 0.3207562\n", + "\n" + ] } ], "source": [ @@ -458,12 +389,19 @@ " knn=query,\n", " source=False)\n", "\n", - "\n", - "results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n", - "\n", - "# shows the result\n", - "results[['_id', '_score', 'fields.title']]\n" + "def show_results(results):\n", + " for result in results:\n", + " print(f'{result[\"fields\"][\"title\"]}\\nScore: {result[\"_score\"]}\\n')\n", + " \n", + "show_results(response.body['hits']['hits'])" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -471,13 +409,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3.11.3 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.9.6" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" }, "vscode": { "interpreter": { @@ -486,5 +432,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 4 } diff --git a/notebooks/langchain/Makefile b/notebooks/langchain/Makefile new file mode 100644 index 00000000..7217c610 --- /dev/null +++ b/notebooks/langchain/Makefile @@ -0,0 +1,11 @@ +NBTEST = ../../bin/nbtest +NOTEBOOKS = \ + langchain-using-own-model.ipynb \ + langchain-vector-store-using-elser.ipynb + +.PHONY: all $(NOTEBOOKS) + +all: $(NOTEBOOKS) + +$(NOTEBOOKS): + -$(NBTEST) $@ diff --git a/notebooks/langchain/_nbtest.setup.langchain-vector-store-using-elser.ipynb b/notebooks/langchain/_nbtest.setup.langchain-vector-store-using-elser.ipynb new file mode 100644 index 00000000..5c1d4cac --- /dev/null +++ b/notebooks/langchain/_nbtest.setup.langchain-vector-store-using-elser.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "62ab8486-e088-424b-80d4-1e9a6a181051", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU elasticsearch" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "088845c2-ec80-4a66-995d-6f8092fe5058", + "metadata": {}, + "outputs": [], + "source": [ + "# get the Elasticsearch client\n", + "from elasticsearch import Elasticsearch, exceptions\n", + "from getpass import getpass\n", + "import time\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26d7a2ad-4ace-4122-8d07-7b300014dca8", + "metadata": {}, + "outputs": [], + "source": [ + "# delete model if already downloaded and deployed\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n", + " print(\"Model deleted successfully, We will proceed with creating one\")\n", + "except exceptions.NotFoundError:\n", + " print(\"Model doesn't exist, but We will proceed with creating one\")\n", + "\n", + "# Creates the ELSER model configuration. Automatically downloads the model if it doesn't exist. \n", + "client.ml.put_trained_model(\n", + " model_id=\".elser_model_2\",\n", + " input={\n", + " \"field_names\": [\"text_field\"]\n", + " }\n", + " )\n", + "\n", + "while True:\n", + " status = client.ml.get_trained_models(\n", + " model_id=\".elser_model_2\",\n", + " include=\"definition_status\"\n", + " )\n", + " \n", + " if (status[\"trained_model_configs\"][0][\"fully_defined\"]):\n", + " break\n", + " time.sleep(5)\n", + "\n", + "# Start trained model deployment if not already deployed\n", + "client.ml.start_trained_model_deployment(\n", + " model_id=\".elser_model_2\",\n", + " number_of_allocations=1,\n", + " wait_for=\"starting\"\n", + ")\n", + "\n", + "while True:\n", + " status = client.ml.get_trained_models_stats(\n", + " model_id=\".elser_model_2\",\n", + " )\n", + " if (status[\"trained_model_stats\"][0][\"deployment_stats\"][\"state\"] == \"started\"):\n", + " print(\"ELSER Model has been successfully deployed.\")\n", + " break\n", + " else:\n", + " print(\"ELSER Model is currently being deployed.\")\n", + " time.sleep(5)\n", + "\n", + "time.sleep(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75fb9815-56d2-45c7-8467-e92b2f8aee7c", + "metadata": {}, + "outputs": [], + "source": [ + "client.indices.delete(index=\"workplace_index\", ignore_unavailable=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a8141b0-5ae5-44d4-aa84-32368a55d276", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/langchain/_nbtest.teardown.langchain-using-own-model.ipynb b/notebooks/langchain/_nbtest.teardown.langchain-using-own-model.ipynb new file mode 100644 index 00000000..aac893b7 --- /dev/null +++ b/notebooks/langchain/_nbtest.teardown.langchain-using-own-model.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2faa616d-f250-4ae8-84c3-f92715aefbf7", + "metadata": {}, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n", + "\n", + "# delete the notebook's index\n", + "client.indices.delete(index=\"approx-search-demo\", ignore_unavailable=True)\n", + "\n", + "# delete the pipeline\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"vectorize_workplace\")\n", + "except:\n", + " pass\n", + "\n", + "# delete the model\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n", + "except:\n", + " pass" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/langchain/_nbtest.teardown.langchain-vector-store-using-elser.ipynb b/notebooks/langchain/_nbtest.teardown.langchain-vector-store-using-elser.ipynb new file mode 100644 index 00000000..de691c7d --- /dev/null +++ b/notebooks/langchain/_nbtest.teardown.langchain-vector-store-using-elser.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8bed2c83-87a7-40ca-bc5e-b9088c7e38a0", + "metadata": {}, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n", + "\n", + "# delete the notebook's index\n", + "client.indices.delete(index=\"workplace_index\", ignore_unavailable=True)\n", + "\n", + "# delete the pipeline\n", + "try:\n", + " client.ingest.delete_pipeline(id=\".elser_model_2_sparse_embedding\")\n", + "except:\n", + " pass\n", + "\n", + "# delete the model\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n", + "except:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dbce06c-fea9-457b-87c4-0a0e5cfcc1b7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/langchain/langchain-using-own-model.ipynb b/notebooks/langchain/langchain-using-own-model.ipynb index cdffcee2..dfa75ae4 100644 --- a/notebooks/langchain/langchain-using-own-model.ipynb +++ b/notebooks/langchain/langchain-using-own-model.ipynb @@ -26,21 +26,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "langserve 0.0.21 requires pydantic<2,>=1, but you have pydantic 2.3.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "!python3 -m pip install -qU langchain elasticsearch tiktoken sentence-transformers eland transformers\n", "\n", @@ -79,9 +67,6 @@ "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", "\n", - "# https://platform.openai.com/api-keys\n", - "OPENAI_API_KEY = getpass(\"OpenAI API key: \")\n", - "\n", "vector_store = ElasticsearchStore(\n", " es_cloud_id=ELASTIC_CLOUD_ID, \n", " es_api_key=ELASTIC_API_KEY, \n", @@ -192,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -200,33 +185,7 @@ "id": "xAkc1OVcOxy3", "outputId": "b2453634-89b8-48bc-ac65-a6a1c3b8170f" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating index approx-search-demo\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/_8/2nxt7jjx27bd8bm5lw63ht340000gn/T/ipykernel_32495/3913732881.py:37: DeprecationWarning: Passing transport options in the API method is deprecated. Use 'Elasticsearch.options()' instead.\n", - " vector_store.client.indices.create(index=INDEX_NAME, mappings=INDEX_MAPPING, settings=INDEX_SETTINGS,\n" - ] - }, - { - "data": { - "text/plain": [ - "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'approx-search-demo'})" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# define index name\n", "INDEX_NAME=\"approx-search-demo\"\n", @@ -260,10 +219,8 @@ "# check if we want to delete index before creating the index\n", "if(SHOULD_DELETE_INDEX):\n", " if vector_store.client.indices.exists(index=INDEX_NAME):\n", - " print(\"Deleting existing %s\" % INDEX_NAME)\n", " vector_store.client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n", "\n", - "print(\"Creating index %s\" % INDEX_NAME)\n", "vector_store.client.indices.create(index=INDEX_NAME, mappings=INDEX_MAPPING, settings=INDEX_SETTINGS,\n", " ignore=[400, 404])\n" ] @@ -368,7 +325,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.11.4 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -382,9 +339,8 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.3" + "version": "3.11.6" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" @@ -392,5 +348,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/langchain/langchain-vector-store-using-elser.ipynb b/notebooks/langchain/langchain-vector-store-using-elser.ipynb index 5efefbd9..e32782eb 100644 --- a/notebooks/langchain/langchain-vector-store-using-elser.ipynb +++ b/notebooks/langchain/langchain-vector-store-using-elser.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -70,9 +70,6 @@ "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", "\n", - "# https://platform.openai.com/api-keys\n", - "OPENAI_API_KEY = getpass(\"OpenAI API key: \")\n", - "\n", "vector_store = ElasticsearchStore(\n", " es_cloud_id=ELASTIC_CLOUD_ID, \n", " es_api_key=ELASTIC_API_KEY,\n", @@ -91,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -114,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -148,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -157,7 +154,11 @@ " es_cloud_id=ELASTIC_CLOUD_ID,\n", " es_api_key=ELASTIC_API_KEY,\n", " index_name=\"workplace_index\",\n", - " strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id = \".elser_model_2\")\n", + " strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id=\".elser_model_2\"),\n", + " bulk_kwargs={\n", + " \"chunk_size\": 5,\n", + " \"max_chunk_bytes\": 100000000\n", + " }\n", ")" ] }, @@ -171,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -192,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -200,10 +201,10 @@ "output_type": "stream", "text": [ "Total results: 4\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Purpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Purpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n" + "page_content=\"Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\\n\\nPerformance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.\\n\\nConclusion:\\nBy implementing this compensation bands strategy, our IT company aims to establish fair and competitive compensation practices that align with market standards and foster employee satisfaction. Regular evaluations and market benchmarking will enable us to adapt and refine the strategy to meet the evolving needs of our organization.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content=\"Introduction:\\nThis document outlines the compensation bands strategy for the various teams within our IT company. The goal is to establish a fair and competitive compensation structure that aligns with industry standards, rewards performance, and attracts top talent. By implementing this strategy, we aim to foster employee satisfaction and retention while ensuring the company's overall success.\\n\\nPurpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.\\n\\nJob Levels:\\nTo establish a comprehensive compensation structure, we have defined distinct job levels within each team. These levels reflect varying degrees of skills, experience, and responsibilities. The levels include:\\na. Entry-Level: Employees with limited experience or early career professionals.\\nb. Intermediate-Level: Employees with moderate experience and demonstrated competence.\\nc. Senior-Level: Experienced employees with advanced skills and leadership capabilities.\\nd. Leadership-Level: Managers and team leaders responsible for strategic decision-making.\\n\\nCompensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content='Performance Ratings\\nBased on the performance evaluation, employees will receive a performance rating that reflects their overall performance during the cycle. The rating system should be clearly defined and consistently applied across the organization. Performance ratings will be used to inform decisions regarding promotions, salary increases, and other rewards or recognition.\\nPromotions and Advancements\\nHigh-performing employees who consistently demonstrate strong performance, leadership, and a commitment to the company’s values may be considered for promotions or other advancement opportunities. Promotions will be based on factors such as performance ratings, skills, experience, and the needs of the organization. Employees interested in pursuing a promotion should discuss their career goals and development plans with their supervisor.\\nPerformance Improvement Plans\\nEmployees who receive a low performance rating or are struggling to meet their performance goals may be placed on a Performance Improvement Plan (PIP). A PIP is a structured plan designed to help the employee address specific areas of concern, set achievable improvement goals, and receive additional support or resources as needed. Employees on a PIP will be closely monitored and re-evaluated at the end of the improvement period to determine if satisfactory progress has been made.\\nRecognition and Rewards\\nOur company believes in recognizing and rewarding employees for their hard work and dedication. In addition to promotions and salary increases, employees may be eligible for other forms of recognition or rewards based on their performance. This may include bonuses, awards, or other incentives designed to motivate and celebrate employee achievements. The specific criteria and eligibility for these rewards will be communicated by the HR department or management.' metadata={'summary': 'This Performance Management Policy outlines a consistent and transparent process for evaluating, recognizing, and rewarding employees. It includes goal setting, ongoing feedback, performance evaluations, ratings, promotions, and rewards. The policy applies to all employees and encourages open communication and professional growth.', 'rolePermissions': ['demo', 'manager'], 'name': 'Performance Management Policy'}\n", + "page_content=\"Purpose\\n\\nThe purpose of this vacation policy is to outline the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. This policy aims to promote a healthy work-life balance and encourage employees to take time to rest and recharge.\\nScope\\n\\nThis policy applies to all full-time and part-time employees who have completed their probationary period.\\nVacation Accrual\\n\\nFull-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Part-time employees accrue vacation time on a pro-rata basis, calculated according to their scheduled work hours.\\n\\nVacation time will begin to accrue from the first day of employment, but employees are eligible to take vacation time only after completing their probationary period. Unused vacation time will be carried over to the next year, up to a maximum of [Z days]. Any additional unused vacation time will be forfeited.\\nVacation Scheduling\\n\\nEmployees are required to submit vacation requests to their supervisor at least [A weeks] in advance, specifying the start and end dates of their vacation. Supervisors will review and approve vacation requests based on business needs, ensuring adequate coverage during the employee's absence.\\n\\nEmployees are encouraged to plan their vacations around the company's peak and non-peak periods to minimize disruptions. Vacation requests during peak periods may be subject to limitations and require additional advance notice.\\nVacation Pay\\n\\nEmployees will receive their regular pay during their approved vacation time. Vacation pay will be calculated based on the employee's average earnings over the [B weeks] preceding their vacation.\\nUnplanned Absences and Vacation Time\\n\\nIn the event of an unplanned absence due to illness or personal emergencies, employees may use their accrued vacation time, subject to supervisor approval. Employees must inform their supervisor as soon as possible and provide any required documentation upon their return to work.\\nVacation Time and Termination of Employment\\n\\nIf an employee's employment is terminated, they will be paid out for any unused vacation time, calculated based on their current rate of pay.\\nPolicy Review and Updates\\n\\nThis vacation policy will be reviewed periodically and updated as necessary, taking into account changes in labor laws, business needs, and employee feedback.\\nQuestions and Concerns\\n\\nEmployees are encouraged to direct any questions or concerns about this policy to their supervisor or the HR department.\" metadata={'summary': ': This policy outlines the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. Full-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Vacation requests must be submitted to supervisors at least', 'rolePermissions': ['demo', 'manager'], 'name': 'Company Vacation Policy'}\n" ] } ], @@ -223,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -231,16 +232,16 @@ "output_type": "stream", "text": [ "Total results: 10\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Purpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Purpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Market Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Market Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content=\"Introduction:\\nThis document outlines the compensation bands strategy for the various teams within our IT company. The goal is to establish a fair and competitive compensation structure that aligns with industry standards, rewards performance, and attracts top talent. By implementing this strategy, we aim to foster employee satisfaction and retention while ensuring the company's overall success.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content=\"Introduction:\\nThis document outlines the compensation bands strategy for the various teams within our IT company. The goal is to establish a fair and competitive compensation structure that aligns with industry standards, rewards performance, and attracts top talent. By implementing this strategy, we aim to foster employee satisfaction and retention while ensuring the company's overall success.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n" + "page_content=\"Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\\n\\nPerformance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.\\n\\nConclusion:\\nBy implementing this compensation bands strategy, our IT company aims to establish fair and competitive compensation practices that align with market standards and foster employee satisfaction. Regular evaluations and market benchmarking will enable us to adapt and refine the strategy to meet the evolving needs of our organization.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content=\"Introduction:\\nThis document outlines the compensation bands strategy for the various teams within our IT company. The goal is to establish a fair and competitive compensation structure that aligns with industry standards, rewards performance, and attracts top talent. By implementing this strategy, we aim to foster employee satisfaction and retention while ensuring the company's overall success.\\n\\nPurpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.\\n\\nJob Levels:\\nTo establish a comprehensive compensation structure, we have defined distinct job levels within each team. These levels reflect varying degrees of skills, experience, and responsibilities. The levels include:\\na. Entry-Level: Employees with limited experience or early career professionals.\\nb. Intermediate-Level: Employees with moderate experience and demonstrated competence.\\nc. Senior-Level: Experienced employees with advanced skills and leadership capabilities.\\nd. Leadership-Level: Managers and team leaders responsible for strategic decision-making.\\n\\nCompensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content='Performance Ratings\\nBased on the performance evaluation, employees will receive a performance rating that reflects their overall performance during the cycle. The rating system should be clearly defined and consistently applied across the organization. Performance ratings will be used to inform decisions regarding promotions, salary increases, and other rewards or recognition.\\nPromotions and Advancements\\nHigh-performing employees who consistently demonstrate strong performance, leadership, and a commitment to the company’s values may be considered for promotions or other advancement opportunities. Promotions will be based on factors such as performance ratings, skills, experience, and the needs of the organization. Employees interested in pursuing a promotion should discuss their career goals and development plans with their supervisor.\\nPerformance Improvement Plans\\nEmployees who receive a low performance rating or are struggling to meet their performance goals may be placed on a Performance Improvement Plan (PIP). A PIP is a structured plan designed to help the employee address specific areas of concern, set achievable improvement goals, and receive additional support or resources as needed. Employees on a PIP will be closely monitored and re-evaluated at the end of the improvement period to determine if satisfactory progress has been made.\\nRecognition and Rewards\\nOur company believes in recognizing and rewarding employees for their hard work and dedication. In addition to promotions and salary increases, employees may be eligible for other forms of recognition or rewards based on their performance. This may include bonuses, awards, or other incentives designed to motivate and celebrate employee achievements. The specific criteria and eligibility for these rewards will be communicated by the HR department or management.' metadata={'summary': 'This Performance Management Policy outlines a consistent and transparent process for evaluating, recognizing, and rewarding employees. It includes goal setting, ongoing feedback, performance evaluations, ratings, promotions, and rewards. The policy applies to all employees and encourages open communication and professional growth.', 'rolePermissions': ['demo', 'manager'], 'name': 'Performance Management Policy'}\n", + "page_content=\"Purpose\\n\\nThe purpose of this vacation policy is to outline the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. This policy aims to promote a healthy work-life balance and encourage employees to take time to rest and recharge.\\nScope\\n\\nThis policy applies to all full-time and part-time employees who have completed their probationary period.\\nVacation Accrual\\n\\nFull-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Part-time employees accrue vacation time on a pro-rata basis, calculated according to their scheduled work hours.\\n\\nVacation time will begin to accrue from the first day of employment, but employees are eligible to take vacation time only after completing their probationary period. Unused vacation time will be carried over to the next year, up to a maximum of [Z days]. Any additional unused vacation time will be forfeited.\\nVacation Scheduling\\n\\nEmployees are required to submit vacation requests to their supervisor at least [A weeks] in advance, specifying the start and end dates of their vacation. Supervisors will review and approve vacation requests based on business needs, ensuring adequate coverage during the employee's absence.\\n\\nEmployees are encouraged to plan their vacations around the company's peak and non-peak periods to minimize disruptions. Vacation requests during peak periods may be subject to limitations and require additional advance notice.\\nVacation Pay\\n\\nEmployees will receive their regular pay during their approved vacation time. Vacation pay will be calculated based on the employee's average earnings over the [B weeks] preceding their vacation.\\nUnplanned Absences and Vacation Time\\n\\nIn the event of an unplanned absence due to illness or personal emergencies, employees may use their accrued vacation time, subject to supervisor approval. Employees must inform their supervisor as soon as possible and provide any required documentation upon their return to work.\\nVacation Time and Termination of Employment\\n\\nIf an employee's employment is terminated, they will be paid out for any unused vacation time, calculated based on their current rate of pay.\\nPolicy Review and Updates\\n\\nThis vacation policy will be reviewed periodically and updated as necessary, taking into account changes in labor laws, business needs, and employee feedback.\\nQuestions and Concerns\\n\\nEmployees are encouraged to direct any questions or concerns about this policy to their supervisor or the HR department.\" metadata={'summary': ': This policy outlines the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. Full-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Vacation requests must be submitted to supervisors at least', 'rolePermissions': ['demo', 'manager'], 'name': 'Company Vacation Policy'}\n", + "page_content='Starting May 2022, the company will be implementing a two-day in-office work requirement per week for all eligible employees. Please coordinate with your supervisor and HR department to schedule your in-office workdays while continuing to follow all safety protocols.' metadata={'summary': 'Starting May 2022, employees will need to work two days a week in the office. Coordinate with your supervisor and HR department for these days while following safety protocols.', 'rolePermissions': ['demo', 'manager'], 'name': 'April Work From Home Update'}\n", + "page_content=\"Effective: March 2020\\nPurpose\\n\\nThe purpose of this full-time work-from-home policy is to provide guidelines and support for employees to conduct their work remotely, ensuring the continuity and productivity of business operations during the COVID-19 pandemic and beyond.\\nScope\\n\\nThis policy applies to all employees who are eligible for remote work as determined by their role and responsibilities. It is designed to allow employees to work from home full time while maintaining the same level of performance and collaboration as they would in the office.\\nEligibility\\n\\nEmployees who can perform their work duties remotely and have received approval from their direct supervisor and the HR department are eligible for this work-from-home arrangement.\\nEquipment and Resources\\n\\nThe necessary equipment and resources will be provided to employees for remote work, including a company-issued laptop, software licenses, and access to secure communication tools. Employees are responsible for maintaining and protecting the company's equipment and data.\\nWorkspace\\n\\nEmployees working from home are responsible for creating a comfortable and safe workspace that is conducive to productivity. This includes ensuring that their home office is ergonomically designed, well-lit, and free from distractions.\\nCommunication\\n\\nEffective communication is vital for successful remote work. Employees are expected to maintain regular communication with their supervisors, colleagues, and team members through email, phone calls, video conferences, and other approved communication tools.\\nWork Hours and Availability\\n\\nEmployees are expected to maintain their regular work hours and be available during normal business hours, unless otherwise agreed upon with their supervisor. Any changes to work hours or availability must be communicated to the employee's supervisor and the HR department.\\nPerformance Expectations\\n\\nEmployees working from home are expected to maintain the same level of performance and productivity as if they were working in the office. Supervisors and team members will collaborate to establish clear expectations and goals for remote work.\\nTime Tracking and Overtime\\n\\nEmployees are required to accurately track their work hours using the company's time tracking system. Non-exempt employees must obtain approval from their supervisor before working overtime.\\nConfidentiality and Data Security\\n\\nEmployees must adhere to the company's confidentiality and data security policies while working from home. This includes safeguarding sensitive information, securing personal devices and internet connections, and reporting any security breaches to the IT department.\\nHealth and Well-being\" metadata={'summary': 'This policy outlines the guidelines for full-time remote work, including eligibility, equipment and resources, workspace requirements, communication expectations, performance expectations, time tracking and overtime, confidentiality and data security, health and well-being, and policy reviews and updates. Employees are encouraged to direct any questions or concerns', 'rolePermissions': ['demo', 'manager'], 'name': 'Work From Home Policy'}\n", + "page_content='Performance Management Policy\\nPurpose and Scope\\nThe purpose of this Performance Management Policy is to establish a consistent and transparent process for evaluating, recognizing, and rewarding employee performance. This policy applies to all employees and aims to foster a culture of continuous improvement, professional growth, and open communication between employees and management.\\nPerformance Planning and Goal Setting\\nAt the beginning of each performance cycle, employees and their supervisors will collaborate to set clear, achievable, and measurable performance goals. These goals should align with the company’s strategic objectives and take into account the employee’s job responsibilities, professional development, and career aspirations.\\nOngoing Feedback and Communication\\nThroughout the performance cycle, employees and supervisors are encouraged to engage in regular, constructive feedback and open communication. This includes discussing progress towards goals, addressing challenges, and identifying opportunities for improvement or additional support. Regular check-ins and updates help ensure that employees stay on track and receive the guidance they need to succeed.\\nPerformance Evaluation\\nAt the end of each performance cycle, employees will participate in a formal performance evaluation with their supervisor. This evaluation will assess the employee’s overall performance, including their achievements, areas for improvement, and progress towards goals. Both the employee and supervisor should come prepared to discuss specific examples, accomplishments, and challenges from the performance period.\\nPerformance Ratings\\nBased on the performance evaluation, employees will receive a performance rating that reflects their overall performance during the cycle. The rating system should be clearly defined and consistently applied across the organization. Performance ratings will be used to inform decisions regarding promotions, salary increases, and other rewards or recognition.\\nPromotions and Advancements\\nHigh-performing employees who consistently demonstrate strong performance, leadership, and a commitment to the company’s values may be considered for promotions or other advancement opportunities. Promotions will be based on factors such as performance ratings, skills, experience, and the needs of the organization. Employees interested in pursuing a promotion should discuss their career goals and development plans with their supervisor.\\nPerformance Improvement Plans\\nEmployees who receive a low performance rating or are struggling to meet their performance goals may be placed on a Performance Improvement Plan (PIP). A PIP is a structured plan designed to help the employee address specific areas of concern, set achievable improvement goals, and receive additional support or resources as needed. Employees on a PIP will be closely monitored and re-evaluated at the end of the improvement period to determine if satisfactory progress has been made.\\nRecognition and Rewards' metadata={'summary': 'This Performance Management Policy outlines a consistent and transparent process for evaluating, recognizing, and rewarding employees. It includes goal setting, ongoing feedback, performance evaluations, ratings, promotions, and rewards. The policy applies to all employees and encourages open communication and professional growth.', 'rolePermissions': ['demo', 'manager'], 'name': 'Performance Management Policy'}\n", + "page_content='Review policies and procedures: Familiarize yourself with our employee handbook, which contains important information about our policies and procedures. Please read it thoroughly and adhere to the guidelines.\\nComplete required training: You may be required to complete mandatory training sessions, such as safety training or anti-harassment training. Ensure that you attend and complete these sessions as soon as possible.\\nUpdating Tax Elections and Documents\\nIt is crucial to ensure your tax information is accurate and up-to-date, regardless of the country you work in. Please follow these steps to update your tax elections and documents:\\nComplete tax forms: Fill out the necessary tax forms for your country or region, which determine the amount of income tax withheld from your paycheck. You should complete new tax forms if your personal or financial situation changes, such as marriage, divorce, or a change in the number of dependents.\\nSubmit regional tax forms: Depending on your location, you may be required to complete additional regional or local tax forms. Check with the HR department to determine which forms are necessary.\\nUpdate your address: If you move, make sure to update your address with the HR department to ensure accurate tax reporting.\\nBenefits Enrollment\\nAs a new employee, you are eligible for various benefits, including health insurance, retirement plans, and paid time off. You will receive detailed information about our benefits package during orientation. To enroll in the benefits, please follow these steps:\\nReview benefits options: Carefully review the benefits package and choose the options that best meet your needs.\\nComplete enrollment forms: Fill out the necessary forms to enroll in your chosen benefits. Submit these forms to the HR department within 30 days of your start date.\\nDesignate beneficiaries: If applicable, designate beneficiaries for your life insurance and retirement plans.\\nGetting Settled in Your Workspace\\nTo help you feel comfortable and productive in your new workspace, take the following steps:\\nSet up your workstation: Organize your desk, chair, and computer according to your preferences. If you require any additional equipment or accommodations, please contact the HR department.\\nObtain necessary supplies: Request any necessary office supplies, such as pens, notepads, or folders, from the designated supply area or by contacting the appropriate department.\\nFamiliarize yourself with office resources: Locate common areas, such as break rooms, restrooms, and meeting rooms. Familiarize yourself with office equipment, including printers, scanners, and telephones.' metadata={'summary': '\\nThis onboarding guide provides essential information to new employees on our company culture and values, key onboarding steps, tax elections and documents, benefits enrollment, and setting up their workspace.', 'rolePermissions': ['demo', 'manager'], 'name': 'New Employee Onboarding Guide'}\n", + "page_content=\"Definitions\\na. Intellectual Property (IP): Refers to creations of the mind, such as inventions, literary and artistic works, designs, symbols, and images, that are protected by copyright, trademark, patent, or other forms of legal protection.\\nb. Company Time: Refers to the time during which an employee is actively engaged in performing their job duties.\\nc. Outside Company Time: Refers to the time during which an employee is not engaged in performing their job duties.\\n\\nOwnership of Intellectual Property\\na. Work Generated on Company Time\\ni. Any intellectual property created, conceived, or developed by an employee during company time or using company resources, equipment, or facilities shall be considered the property of the Company.\\nii. Employees are required to promptly disclose any such intellectual property to their supervisor or the appropriate department head.\\nb. Work Generated Outside Company Time\\ni. Intellectual property created, conceived, or developed by an employee outside of company time and without the use of company resources, equipment, or facilities shall generally remain the property of the employee.\\nii. However, if the intellectual property is directly related to the employee's job responsibilities, or if the employee has used company resources, equipment, or facilities in its creation, it may be considered the property of the Company.\\nProtection and Utilization of Intellectual Property\\na. The Company shall have the right to protect, license, and commercialize any intellectual property owned by the company as it deems appropriate.\\nb. Employees are expected to cooperate with the Company in obtaining any necessary legal protection for intellectual property owned by the company, including by signing any documents or providing any necessary information or assistance.\\nConfidentiality\\nEmployees are expected to maintain the confidentiality of any intellectual property owned by the Company and not disclose it to any third parties without the express written consent of an authorized representative of the company.\\nEmployee Acknowledgment\\nAll employees are required to sign an acknowledgment of this Intellectual Property Policy as a condition of their employment with [Company Name]. By signing the acknowledgment, employees agree to abide by the terms of this policy and understand that any violations may result in disciplinary action, up to and including termination of employment.\\nPolicy Review\\nThis Intellectual Property Policy shall be reviewed periodically and may be amended as necessary to ensure its continued effectiveness and compliance with applicable laws and regulations. Employees will be notified of any significant changes to this policy.\" metadata={'summary': \"This Intellectual Property Policy outlines guidelines and procedures for the ownership, protection, and utilization of intellectual property generated by employees during their employment. It establishes the company's ownership of work generated on company time, while recognizing employee ownership of work generated outside of company time without the use of company resources. The policy\", 'rolePermissions': ['demo', 'manager'], 'name': 'Intellectual Property Policy'}\n", + "page_content=\"Read the instructions carefully for each section to ensure you claim the correct amounts.\\n\\nStep 5: Sign and date the form\\nOnce you've completed the form, sign and date it at the bottom.\\n\\nStep 6: Submit the form to your employer\\nSubmit the completed and signed TD1 form to your employer. You can either scan and send it electronically, or provide a printed copy. Your employer will use the information on your TD1 form to calculate the correct amount of tax to be deducted from your pay.\\n\\nStep 7: Update your TD1 form as needed\\nIt's essential to update your TD1 form whenever your personal circumstances change, such as getting married, having a child, or becoming eligible for a new tax credit. Inform your employer of these changes and submit an updated TD1 form to ensure accurate tax deductions.\\n\\nUpdating your tax elections forms is a crucial step in ensuring the correct tax deductions from your pay as a new employee in Canada. Follow this guide and keep your TD1 form up to date to avoid any discrepancies in your tax filings.\" metadata={'summary': ': This guide gives a step-by-step explanation of how to update your TD1 Personal Tax Credits Return form. Access the form from the CRA website and choose the correct version based on your province or territory of residence. Download and open the form in Adobe Reader, fill out the form by entering', 'rolePermissions': ['demo', 'manager'], 'name': 'Updating Your Tax Elections Forms'}\n" ] } ], @@ -259,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -267,10 +268,10 @@ "output_type": "stream", "text": [ "Total results: 4\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Performance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", - "page_content='Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.' metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n" + "page_content=\"Compensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\\n\\nPerformance-Based Compensation:\\nIn addition to the defined compensation bands, we emphasize a performance-based compensation model. Performance evaluations will be conducted regularly, and employees exceeding performance expectations will be eligible for bonuses, incentives, and salary increases. This approach rewards high achievers and motivates employees to excel in their roles.\\n\\nConclusion:\\nBy implementing this compensation bands strategy, our IT company aims to establish fair and competitive compensation practices that align with market standards and foster employee satisfaction. Regular evaluations and market benchmarking will enable us to adapt and refine the strategy to meet the evolving needs of our organization.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content=\"Introduction:\\nThis document outlines the compensation bands strategy for the various teams within our IT company. The goal is to establish a fair and competitive compensation structure that aligns with industry standards, rewards performance, and attracts top talent. By implementing this strategy, we aim to foster employee satisfaction and retention while ensuring the company's overall success.\\n\\nPurpose:\\nThe purpose of this compensation bands strategy is to:\\na. Define clear guidelines for salary ranges based on job levels and market benchmarks.\\nb. Support equitable compensation practices across different teams.\\nc. Encourage employee growth and performance.\\nd. Enable effective budgeting and resource allocation.\\n\\nJob Levels:\\nTo establish a comprehensive compensation structure, we have defined distinct job levels within each team. These levels reflect varying degrees of skills, experience, and responsibilities. The levels include:\\na. Entry-Level: Employees with limited experience or early career professionals.\\nb. Intermediate-Level: Employees with moderate experience and demonstrated competence.\\nc. Senior-Level: Experienced employees with advanced skills and leadership capabilities.\\nd. Leadership-Level: Managers and team leaders responsible for strategic decision-making.\\n\\nCompensation Bands:\\nBased on the job levels, the following compensation bands have been established:\\na. Entry-Level Band: This band encompasses salary ranges for employees in entry-level positions. It aims to provide competitive compensation for individuals starting their careers within the company.\\n\\nb. Intermediate-Level Band: This band covers salary ranges for employees who have gained moderate experience and expertise in their respective roles. It rewards employees for their growing skill set and contributions.\\n\\nc. Senior-Level Band: The senior-level band includes salary ranges for experienced employees who have attained advanced skills and have a proven track record of delivering results. It reflects the increased responsibilities and expectations placed upon these individuals.\\n\\nd. Leadership-Level Band: This band comprises salary ranges for managers and team leaders responsible for guiding and overseeing their respective teams. It considers their leadership abilities, strategic thinking, and the impact they have on the company's success.\\n\\nMarket Benchmarking:\\nTo ensure our compensation remains competitive, regular market benchmarking will be conducted. This involves analyzing industry salary trends, regional compensation data, and market demand for specific roles. The findings will inform periodic adjustments to our compensation bands to maintain alignment with the market.\" metadata={'summary': 'This document outlines a compensation framework for IT teams. It includes job levels, compensation bands, and performance-based incentives to ensure fair and competitive wages. Regular market benchmarking will be conducted to adjust the bands according to industry trends.', 'rolePermissions': ['manager'], 'name': 'Compensation Framework For It Teams'}\n", + "page_content='Performance Ratings\\nBased on the performance evaluation, employees will receive a performance rating that reflects their overall performance during the cycle. The rating system should be clearly defined and consistently applied across the organization. Performance ratings will be used to inform decisions regarding promotions, salary increases, and other rewards or recognition.\\nPromotions and Advancements\\nHigh-performing employees who consistently demonstrate strong performance, leadership, and a commitment to the company’s values may be considered for promotions or other advancement opportunities. Promotions will be based on factors such as performance ratings, skills, experience, and the needs of the organization. Employees interested in pursuing a promotion should discuss their career goals and development plans with their supervisor.\\nPerformance Improvement Plans\\nEmployees who receive a low performance rating or are struggling to meet their performance goals may be placed on a Performance Improvement Plan (PIP). A PIP is a structured plan designed to help the employee address specific areas of concern, set achievable improvement goals, and receive additional support or resources as needed. Employees on a PIP will be closely monitored and re-evaluated at the end of the improvement period to determine if satisfactory progress has been made.\\nRecognition and Rewards\\nOur company believes in recognizing and rewarding employees for their hard work and dedication. In addition to promotions and salary increases, employees may be eligible for other forms of recognition or rewards based on their performance. This may include bonuses, awards, or other incentives designed to motivate and celebrate employee achievements. The specific criteria and eligibility for these rewards will be communicated by the HR department or management.' metadata={'summary': 'This Performance Management Policy outlines a consistent and transparent process for evaluating, recognizing, and rewarding employees. It includes goal setting, ongoing feedback, performance evaluations, ratings, promotions, and rewards. The policy applies to all employees and encourages open communication and professional growth.', 'rolePermissions': ['demo', 'manager'], 'name': 'Performance Management Policy'}\n", + "page_content=\"Purpose\\n\\nThe purpose of this vacation policy is to outline the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. This policy aims to promote a healthy work-life balance and encourage employees to take time to rest and recharge.\\nScope\\n\\nThis policy applies to all full-time and part-time employees who have completed their probationary period.\\nVacation Accrual\\n\\nFull-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Part-time employees accrue vacation time on a pro-rata basis, calculated according to their scheduled work hours.\\n\\nVacation time will begin to accrue from the first day of employment, but employees are eligible to take vacation time only after completing their probationary period. Unused vacation time will be carried over to the next year, up to a maximum of [Z days]. Any additional unused vacation time will be forfeited.\\nVacation Scheduling\\n\\nEmployees are required to submit vacation requests to their supervisor at least [A weeks] in advance, specifying the start and end dates of their vacation. Supervisors will review and approve vacation requests based on business needs, ensuring adequate coverage during the employee's absence.\\n\\nEmployees are encouraged to plan their vacations around the company's peak and non-peak periods to minimize disruptions. Vacation requests during peak periods may be subject to limitations and require additional advance notice.\\nVacation Pay\\n\\nEmployees will receive their regular pay during their approved vacation time. Vacation pay will be calculated based on the employee's average earnings over the [B weeks] preceding their vacation.\\nUnplanned Absences and Vacation Time\\n\\nIn the event of an unplanned absence due to illness or personal emergencies, employees may use their accrued vacation time, subject to supervisor approval. Employees must inform their supervisor as soon as possible and provide any required documentation upon their return to work.\\nVacation Time and Termination of Employment\\n\\nIf an employee's employment is terminated, they will be paid out for any unused vacation time, calculated based on their current rate of pay.\\nPolicy Review and Updates\\n\\nThis vacation policy will be reviewed periodically and updated as necessary, taking into account changes in labor laws, business needs, and employee feedback.\\nQuestions and Concerns\\n\\nEmployees are encouraged to direct any questions or concerns about this policy to their supervisor or the HR department.\" metadata={'summary': ': This policy outlines the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. Full-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Vacation requests must be submitted to supervisors at least', 'rolePermissions': ['demo', 'manager'], 'name': 'Company Vacation Policy'}\n" ] } ], @@ -278,11 +279,18 @@ "results = documents.similarity_search(\"How does the compensation work\", filter=[{ 'match': { \"metadata.rolePermissions\": \"manager\" }}])\n", "showResults(results)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.11.4 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -296,9 +304,8 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" @@ -306,5 +313,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/model-upgrades/.nbtest.yml b/notebooks/model-upgrades/.nbtest.yml new file mode 100644 index 00000000..862900ea --- /dev/null +++ b/notebooks/model-upgrades/.nbtest.yml @@ -0,0 +1,12 @@ +masks: +- "'name': '[^']+'" +- "'build_flavor': '[^']+'" +- '[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?' +- "'build_snapshot': [^,]+" +- "'cluster_name': '[^']+'" +- "'cluster_uuid': '[^']+'" +- "'build_hash': '[^']+'" +- "'build_date': '[^']+'" +- "'_version': [0-9]+" +- '^ID: .*$' +- '^Score: [0-9]+\.[0-9][0-9]*$' diff --git a/notebooks/model-upgrades/Makefile b/notebooks/model-upgrades/Makefile new file mode 100644 index 00000000..a66139b1 --- /dev/null +++ b/notebooks/model-upgrades/Makefile @@ -0,0 +1,10 @@ +NBTEST = ../../bin/nbtest +NOTEBOOKS = \ + upgrading-index-to-use-elser.ipynb + +.PHONY: all $(NOTEBOOKS) + +all: $(NOTEBOOKS) + +$(NOTEBOOKS): + -$(NBTEST) $@ diff --git a/notebooks/model-upgrades/_nbtest.setup.upgrading-index-to-use-elser.ipynb b/notebooks/model-upgrades/_nbtest.setup.upgrading-index-to-use-elser.ipynb new file mode 100644 index 00000000..f9580bb2 --- /dev/null +++ b/notebooks/model-upgrades/_nbtest.setup.upgrading-index-to-use-elser.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "33e36462-0981-4c37-bbe7-bc356ab39bde", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU elasticsearch sentence-transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d92d41fb-975f-4f45-8e8a-13c1c11a55b5", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Elastic Cloud ID: ········\n", + "Elastic Api Key: ········\n" + ] + } + ], + "source": [ + "# get the Elasticsearch client\n", + "from elasticsearch import Elasticsearch, exceptions\n", + "from getpass import getpass\n", + "import time\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00acf8a3-fad8-4be8-b8a8-17c37fcde1cc", + "metadata": {}, + "outputs": [], + "source": [ + "# delete model if already downloaded and deployed\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\".elser_model_2\",force=True)\n", + " print(\"Model deleted successfully, We will proceed with creating one\")\n", + "except exceptions.NotFoundError:\n", + " print(\"Model doesn't exist, but We will proceed with creating one\")\n", + "\n", + "# Creates the ELSER model configuration. Automatically downloads the model if it doesn't exist. \n", + "client.ml.put_trained_model(\n", + " model_id=\".elser_model_2\",\n", + " input={\n", + " \"field_names\": [\"text_field\"]\n", + " }\n", + " )\n", + "\n", + "while True:\n", + " status = client.ml.get_trained_models(\n", + " model_id=\".elser_model_2\",\n", + " include=\"definition_status\"\n", + " )\n", + " \n", + " if (status[\"trained_model_configs\"][0][\"fully_defined\"]):\n", + " break\n", + " time.sleep(5)\n", + "\n", + "# Start trained model deployment if not already deployed\n", + "client.ml.start_trained_model_deployment(\n", + " model_id=\".elser_model_2\",\n", + " number_of_allocations=1,\n", + " wait_for=\"starting\"\n", + ")\n", + "\n", + "while True:\n", + " status = client.ml.get_trained_models_stats(\n", + " model_id=\".elser_model_2\",\n", + " )\n", + " if (status[\"trained_model_stats\"][0][\"deployment_stats\"][\"state\"] == \"started\"):\n", + " print(\"ELSER Model has been successfully deployed.\")\n", + " break\n", + " else:\n", + " print(\"ELSER Model is currently being deployed.\")\n", + " time.sleep(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1ad5f6d6-cc07-4f62-a7ab-25f635e4f111", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'errors': False, 'took': 69, 'items': [{'index': {'_index': 'books', '_id': 'd8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'eMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'ecCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'esCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'e8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 4, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 5, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fcCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'fsCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'f8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'gMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}]})" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "from urllib.request import urlopen\n", + "from sentence_transformers import SentenceTransformer\n", + "\n", + "# these tests need book_index to exist ahead of time\n", + "client.indices.delete(index=\"books\", ignore_unavailable=True)\n", + "\n", + "mappings = {\n", + " \"properties\": {\n", + " \"title_vector\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 384,\n", + " \"index\": \"true\",\n", + " \"similarity\": \"cosine\"\n", + " }\n", + " }\n", + "}\n", + "client.indices.create(index='books', mappings=mappings)\n", + "\n", + "url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n", + "response = urlopen(url)\n", + "books = json.loads(response.read())\n", + "\n", + "model = SentenceTransformer('all-MiniLM-L6-v2')\n", + "operations = []\n", + "for book in books:\n", + " operations.append({\"index\": {\"_index\": \"books\"}})\n", + " # Transforming the title into an embedding using the model\n", + " book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n", + " operations.append(book)\n", + "client.bulk(index=\"books\", operations=operations, refresh=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65b92c67-b94b-4eda-923d-ed81706c10f9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/model-upgrades/_nbtest.teardown.upgrading-index-to-use-elser.ipynb b/notebooks/model-upgrades/_nbtest.teardown.upgrading-index-to-use-elser.ipynb new file mode 100644 index 00000000..4d479722 --- /dev/null +++ b/notebooks/model-upgrades/_nbtest.teardown.upgrading-index-to-use-elser.ipynb @@ -0,0 +1,72 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "4ce4d4c3-1342-41da-b84c-48f951380e53", + "metadata": {}, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch\n", + "from getpass import getpass\n", + "\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n", + "\n", + "# delete the indices\n", + "client.indices.delete(index=\"books\", ignore_unavailable=True)\n", + "client.indices.delete(index=\"movies\", ignore_unavailable=True)\n", + "client.indices.delete(index=\"elser-movies\", ignore_unavailable=True)\n", + "client.indices.delete(index=\"elser-upgrade-index-demo\", ignore_unavailable=True)\n", + "client.indices.delete(index=\"elser-books\", ignore_unavailable=True)\n", + "\n", + "# delete the pipelines\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"ingest-pipeline-lowercase\")\n", + "except:\n", + " pass\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"elser-ingest-pipeline\")\n", + "except:\n", + " pass\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"elser-pipeline-upgrade-demo\")\n", + "except:\n", + " pass\n", + "try:\n", + " client.ingest.delete_pipeline(id=\"elser-pipeline-books\")\n", + "except:\n", + " pass\n", + "\n", + "# delete the model\n", + "try:\n", + " client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n", + "except:\n", + " pass" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/model-upgrades/upgrading-index-to-use-elser.ipynb b/notebooks/model-upgrades/upgrading-index-to-use-elser.ipynb index c2cbcd76..1cfa6952 100644 --- a/notebooks/model-upgrades/upgrading-index-to-use-elser.ipynb +++ b/notebooks/model-upgrades/upgrading-index-to-use-elser.ipynb @@ -57,7 +57,7 @@ "source": [ "from elasticsearch import Elasticsearch, helpers\n", "from urllib.request import urlopen\n", - "import getpass\n", + "from getpass import getpass\n", "import json\n", "import time" ] @@ -73,20 +73,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Elastic Cloud ID: ········\n", + "Elastic Api Key: ········\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'instance-0000000001', 'cluster_name': 'ad402eb9a59041458b8edfc021e91caf', 'cluster_uuid': 'ks_HfcCdSf2qrcKZQsk9Lg', 'version': {'number': '8.11.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'd9ec3fa628c7b0ba3d25692e277ba26814820b20', 'build_date': '2023-11-04T10:04:57.184859352Z', 'build_snapshot': False, 'lucene_version': '9.8.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], "source": [ - "# Found in the 'Manage Deployment' page\n", - "CLOUD_ID = getpass.getpass('Elastic Cloud ID: ')\n", + "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", "\n", - "# Password for the 'elastic' user generated by Elasticsearch\n", - "ELASTIC_PASSWORD = getpass.getpass('Elastic password: ')\n", + "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", "\n", "# Create the client instance\n", "client = Elasticsearch(\n", - " cloud_id=CLOUD_ID,\n", - " basic_auth=(\"elastic\", ELASTIC_PASSWORD)\n", + " cloud_id=ELASTIC_CLOUD_ID,\n", + " api_key=ELASTIC_API_KEY,\n", ")\n", "\n", "print(client.info())" @@ -123,9 +139,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\n", "client.ingest.put_pipeline(\n", @@ -152,9 +179,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'movies'})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.indices.delete(index=\"movies\",ignore_unavailable=True)\n", "client.indices.create(\n", @@ -192,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -221,8 +259,8 @@ "# Use helpers.bulk to index\n", "helpers.bulk(client, documents)\n", "\n", - "print(\"Done indexing documents into `movies` index!\")\n", - "time.sleep(5)" + "time.sleep(5)\n", + "print(\"Done indexing documents into `movies` index!\")" ] }, { @@ -239,9 +277,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.ingest.put_pipeline(\n", " id=\"elser-ingest-pipeline\", \n", @@ -273,9 +322,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-movies'})" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.indices.delete(index=\"elser-movies\",ignore_unavailable=True)\n", "client.indices.create(\n", @@ -319,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -351,22 +411,22 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Score: 6.403741\n", + "Score: 6.403748\n", "Title: se7en\n", "Plot: Two detectives, a rookie and a veteran, hunt a serial killer who uses the seven deadly sins as his motives.\n", "\n", - "Score: 3.6703415\n", + "Score: 3.6703482\n", "Title: the departed\n", "Plot: An undercover cop and a mole in the police attempt to identify each other while infiltrating an Irish gang in South Boston.\n", "\n", - "Score: 2.9359162\n", + "Score: 2.9359207\n", "Title: the usual suspects\n", "Plot: A sole survivor tells of the twisty events leading up to a horrific gun battle on a boat, which began when five criminals met at a seemingly random police lineup.\n", "\n" @@ -417,9 +477,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.ingest.put_pipeline(\n", " id=\"elser-pipeline-upgrade-demo\", \n", @@ -430,8 +501,8 @@ " \"model_id\": \".elser_model_2\",\n", " \"input_output\": [\n", " {\n", - " \"input_field\": \"plot\",\n", - " \"output_field\": \"plot_embedding\"\n", + " \"input_field\": \"title\",\n", + " \"output_field\": \"title_embedding\"\n", " }\n", " ]\n", " }\n", @@ -450,16 +521,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-upgrade-index-demo'})" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.indices.delete(index=\"elser-upgrade-index-demo\", ignore_unavailable=True)\n", "client.indices.create(\n", " index=\"elser-upgrade-index-demo\",\n", " mappings={\n", " \"properties\": {\n", - " \"plot\": {\n", + " \"title\": {\n", " \"type\": \"text\",\n", " \"fields\": {\n", " \"keyword\": {\n", @@ -468,7 +550,7 @@ " }\n", " }\n", " },\n", - " \"plot_embedding\": {\n", + " \"title_embedding\": {\n", " \"type\": \"sparse_vector\"\n", " },\n", " }\n", @@ -489,18 +571,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "client.reindex(source={\n", - " \"index\": \"my-index\", # replace with your index name\n", + " \"index\": \"books\", # replace with your index name\n", " \"_source\": {\n", - " \"excludes\": [\"my-tokens-field\"] # replace with the field-name from your index, that has previously generated tokens\n", + " \"excludes\": [\"title_vector\"] # replace with the field-name from your index, that has previously generated tokens\n", " }}, \n", " dest={\n", " \"index\": \"elser-upgrade-index-demo\",\n", - " \"pipeline\": \"elser-pipeline-upgrade-demo\"\n", + " \"pipeline\": \"elser-pipeline-upgrade-demo\"\n", " })\n", "time.sleep(5)" ] @@ -516,24 +598,24 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Score: 3.3168378\n", - "Title: Fight Club\n", - "Plot: An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.\n", + "Score: 14.755971\n", + "Title: Python Crash Course\n", + "Plot: Python Crash Course\n", "\n", - "Score: 1.5777297\n", - "Title: The Godfather\n", - "Plot: An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.\n", + "Score: 14.168372\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Plot: The Pragmatic Programmer: Your Journey to Mastery\n", "\n", - "Score: 1.1162646\n", - "Title: The Matrix\n", - "Plot: A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.\n", + "Score: 11.704832\n", + "Title: The Clean Coder: A Code of Conduct for Professional Programmers\n", + "Plot: The Clean Coder: A Code of Conduct for Professional Programmers\n", "\n" ] } @@ -544,9 +626,9 @@ " size=3,\n", " query={\n", " \"text_expansion\": {\n", - " \"plot_embedding\": {\n", + " \"title_embedding\": {\n", " \"model_id\":\".elser_model_2\",\n", - " \"model_text\":\"child toy\"\n", + " \"model_text\":\"Programming Course\"\n", " }\n", " }\n", " }\n", @@ -556,7 +638,7 @@ " doc_id = hit['_id']\n", " score = hit['_score']\n", " title = hit['_source']['title']\n", - " plot = hit['_source']['plot']\n", + " plot = hit['_source']['title']\n", " print(f\"Score: {score}\\nTitle: {title}\\nPlot: {plot}\\n\")\n" ] }, @@ -568,41 +650,41 @@ "\n", "Now we will see how to move your index which already has generated `embedding` using a different model. \n", "\n", - "Lets consider the index - `blogs` and has generated `text_embedding` using the NLP model `sentence-transformers__all-minilm-l6-v2`. In case you would like know about more how to load a NLP model to an index, follow the steps from our notebook [loading-model-from-hugging-face.ipynb](../integrations/hugging-face/loading-model-from-hugging-face.ipynb)\n", + "Lets consider the index - `books` and has generated `title_vector` using the NLP model `sentence-transformers__all-minilm-l6-v2`. In case you would like know about more how to load a NLP model to an index, follow the steps from our notebook [loading-model-from-hugging-face.ipynb](../integrations/hugging-face/loading-model-from-hugging-face.ipynb)\n", "\n", "Follow similiar proceedure that we did in previously: \n", "1. Create a ingestion pipeline with ELSER model `.elser_model_2`\n", "2. Create a index with mappings, with the pipeline we created in the previous step. \n", - "3. Reindex, excluding the field that has embedding from the `blogs` index\n", + "3. Reindex, excluding the field that has embedding from the `books` index\n", "\n", - "Before we begin, lets take a look at our index `blogs` and see the mappings" + "Before we begin, lets take a look at our index `books` and see the mappings" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "ObjectApiResponse({'blogs': {'aliases': {}, 'mappings': {'properties': {'text_embedding': {'properties': {'is_truncated': {'type': 'boolean'}, 'model_id': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'predicted_value': {'type': 'dense_vector', 'dims': 384, 'index': True, 'similarity': 'l2_norm'}}}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'blocks': {'read_only_allow_delete': 'false'}, 'provided_name': 'blogs', 'default_pipeline': 'vectorize_blogs', 'creation_date': '1697651466693', 'number_of_replicas': '1', 'uuid': 'JWkPyTphQ2GV0sLadHWjjw', 'version': {'created': '8500003'}}}}})" + "ObjectApiResponse({'books': {'aliases': {}, 'mappings': {'properties': {'authors': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'num_reviews': {'type': 'long'}, 'publish_date': {'type': 'date'}, 'publisher': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'summary': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'title_vector': {'type': 'dense_vector', 'dims': 384, 'index': True, 'similarity': 'cosine'}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'books', 'creation_date': '1706118077023', 'number_of_replicas': '1', 'uuid': 'GxGfG_LtSBOIXsB-5bF2_A', 'version': {'created': '8500003'}}}}})" ] }, - "execution_count": 8, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "client.indices.get(index=\"blogs\")" + "client.indices.get(index=\"books\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the field `text_embedding`, We will exclude this field in our new index and generate new mapping against the field `title` from the `blogs` index" + "Notice the field `title_vector`, We will exclude this field in our new index and generate new mapping against the field `title` from the `books` index" ] }, { @@ -619,9 +701,27 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True})" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "client.ingest.put_pipeline(\n", - " id=\"elser-pipeline-blogs\", \n", + " id=\"elser-pipeline-books\", \n", " description=\"Ingest pipeline for ELSER upgrade\",\n", " processors=[\n", " {\n", @@ -645,18 +745,29 @@ "source": [ "# Create index with mappings\n", "\n", - "Lets create a index `elser-blogs` with mappings" + "Lets create a index `elser-books` with mappings" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-books'})" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "client.indices.delete(index=\"elser-blogs\", ignore_unavailable=True)\n", + "client.indices.delete(index=\"elser-books\", ignore_unavailable=True)\n", "client.indices.create(\n", - " index=\"elser-blogs\",\n", + " index=\"elser-books\",\n", " mappings={\n", " \"properties\": {\n", " \"title\": {\n", @@ -682,23 +793,23 @@ "source": [ "# Reindex API\n", "\n", - "we will use the [Reindex API](https://elasticsearch-py.readthedocs.io/en/stable/api.html#elasticsearch.Elasticsearch.reindex) to copy data and generate `text_expansion` embedding to our new index `elser-blogs`. " + "we will use the [Reindex API](https://elasticsearch-py.readthedocs.io/en/stable/api.html#elasticsearch.Elasticsearch.reindex) to copy data and generate `text_expansion` embedding to our new index `elser-books`. " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "client.reindex(source={\n", - " \"index\": \"blogs\",\n", + " \"index\": \"books\",\n", " \"_source\": {\n", - " \"excludes\": [\"text_embedding\"]\n", + " \"excludes\": [\"title_vector\"]\n", " }\n", " }, dest={\n", - " \"index\": \"elser-blogs\",\n", - " \"pipeline\": \"elser-pipeline-blogs\"\n", + " \"index\": \"elser-books\",\n", + " \"pipeline\": \"elser-pipeline-books\"\n", " })\n", "time.sleep(5)" ] @@ -708,36 +819,43 @@ "metadata": {}, "source": [ "# Querying your data\n", - "Success! Now we can query data on the index `elser-blogs`." + "Success! Now we can query data on the index `elser-books`." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Score: 27.618645\n", - "Title: Brewing in Beats: Track network connections\n", - "Score: 3.8143802\n", - "Title: Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website\n", - "Score: 3.3623078\n", - "Title: Data Visualization For Machine Learning\n" + "Score: 22.333044\n", + "Title: Python Crash Course\n", + "Score: 9.364547\n", + "Title: The Pragmatic Programmer: Your Journey to Mastery\n", + "Score: 8.410445\n", + "Title: Clean Code: A Handbook of Agile Software Craftsmanship\n" ] } ], "source": [ "response = client.search(\n", - " index='elser-blogs', \n", + " index='elser-books', \n", " size=3,\n", " query={\n", " \"text_expansion\": {\n", " \"title_embedding\": {\n", " \"model_id\":\".elser_model_2\",\n", - " \"model_text\":\"Track network connections\"\n", + " \"model_text\":\"Python tutorial\"\n", " }\n", " }\n", " }\n", @@ -749,11 +867,18 @@ " title = hit['_source']['title']\n", " print(f\"Score: {score}\\nTitle: {title}\")\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.11.4 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -767,9 +892,8 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" }, - "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" @@ -777,5 +901,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }