diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 19cc3423..6a5e1f60 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,6 +17,7 @@ jobs: es_stack: - 8.11.4 - 8.12.0 + - 8.13.0-SNAPSHOT runs-on: ubuntu-latest services: elasticsearch: diff --git a/notebooks/document-chunking/with-index-pipelines.ipynb b/notebooks/document-chunking/with-index-pipelines.ipynb index c4fced89..ec3359de 100644 --- a/notebooks/document-chunking/with-index-pipelines.ipynb +++ b/notebooks/document-chunking/with-index-pipelines.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "id": "ffc5fa6f", "metadata": { "id": "ffc5fa6f" @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 30, "id": "f38e0397", "metadata": { "colab": { @@ -80,7 +80,16 @@ "id": "f38e0397", "outputId": "ad6df489-d242-4229-a42a-39c5ca19d124" }, - "outputs": [], + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Elastic Cloud ID: ········\n", + "Elastic Api Key: ········\n" + ] + } + ], "source": [ "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", @@ -122,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "25c618eb", "metadata": { "colab": { @@ -131,15 +140,7 @@ "id": "25c618eb", "outputId": "30a6ba5b-5109-4457-ddfe-5633a077ca9b" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'name': 'instance-0000000011', 'cluster_name': 'd1bd36862ce54c7b903e2aacd4cd7f0a', 'cluster_uuid': 'tIkh0X_UQKmMFQKSfUw-VQ', 'version': {'number': '8.11.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '6f9ff581fbcde658e6f69d6ce03050f060d1fd0c', 'build_date': '2023-11-11T10:05:59.421038163Z', 'build_snapshot': False, 'lucene_version': '9.8.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}\n" - ] - } - ], + "outputs": [], "source": [ "print(client.info())" ] @@ -155,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "63560817", "metadata": {}, "outputs": [], @@ -185,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "id": "6bc95238", "metadata": { "id": "6bc95238" @@ -197,7 +198,7 @@ "ObjectApiResponse({'acknowledged': True})" ] }, - "execution_count": 24, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -279,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 25, "id": "_OAahfg-tqrf", "metadata": { "colab": { @@ -295,7 +296,7 @@ "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'chunk_passages_example'})" ] }, - "execution_count": 46, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -349,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "008d723e", "metadata": { "id": "008d723e" @@ -395,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 27, "id": "f12ce2c9", "metadata": { "id": "f12ce2c9" @@ -431,12 +432,12 @@ "\n", "To search the data and return what chunk matched the query best you use inner_hits with the knn clause to return just that best matching chunk of the document in the hits output from the query.\n", "\n", - "Below you will see the response which returns the best document and the relevant portion of the larger document text.\n" + "Below you will see the response which returns the best document and the most relevant passage.\n" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 29, "id": "Df7hwcIjYwMT", "metadata": { "colab": { @@ -479,7 +480,7 @@ "a.\n", "\n", "\n", - "Score: 0.76643425\n", + "Score: 0.7664343\n", "\n", "---\n", "\n", @@ -502,6 +503,7 @@ " index=INDEX_NAME,\n", " knn={\n", " \"inner_hits\": {\n", + " \"size\": 1,\n", " \"_source\": False,\n", " \"fields\": [\n", " \"passages.text\"\n", @@ -521,6 +523,14 @@ "\n", "pretty_response(response)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4bbcc4b-ea2d-47a3-b475-c2eb0eebb7e2", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -542,7 +552,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/document-chunking/with-langchain-splitters.ipynb b/notebooks/document-chunking/with-langchain-splitters.ipynb index 6a92d272..d04e4b59 100644 --- a/notebooks/document-chunking/with-langchain-splitters.ipynb +++ b/notebooks/document-chunking/with-langchain-splitters.ipynb @@ -377,7 +377,7 @@ "metadata": {}, "source": [ "### Perform a Nested Search\n", - "We can now perform a nested search, to find the passages that match our query, which will be returned in `inner_hits`." + "We can now perform a nested search, to find the passages that match our query, which will be returned in `inner_hits`. In the example that follows only one passage per parent document is requested." ] }, { @@ -478,6 +478,7 @@ " index=INDEX_NAME, \n", " knn={\n", " \"inner_hits\": {\n", + " \"size\": 1,\n", " \"_source\": False,\n", " \"fields\": [\n", " \"passages.text\"\n", @@ -644,7 +645,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.13" } }, "nbformat": 4,