Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
172f897
fixed metadata merging to properly update the meta key
davidsbatista Jan 5, 2026
842da6a
formmatting
davidsbatista Jan 5, 2026
a28bb2a
adding count distinct metadata values
davidsbatista Jan 5, 2026
b0b594c
refactoring to reduce duplicated code
davidsbatista Jan 5, 2026
b23274f
adding get metadata info
davidsbatista Jan 5, 2026
22e160d
adding get_field_max_min
davidsbatista Jan 5, 2026
310846d
fixing get_field_max_min
davidsbatista Jan 5, 2026
e0be21f
adding get_field_unique_values
davidsbatista Jan 5, 2026
e6932b0
adding get_field_unique_values async
davidsbatista Jan 5, 2026
511c421
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 5, 2026
5e7cd90
formmatting
davidsbatista Jan 5, 2026
0c0f31c
updating tests
davidsbatista Jan 5, 2026
2010261
formmatting
davidsbatista Jan 5, 2026
873a4dc
cleaning up
davidsbatista Jan 6, 2026
1f3347b
adding httpx as a dependency
davidsbatista Jan 6, 2026
3622168
fixing pyproject.toml
davidsbatista Jan 6, 2026
b3e99bd
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 7, 2026
d96cc4c
updating tests: making use of the new refresh feature
davidsbatista Jan 7, 2026
11b6d88
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 8, 2026
69863d0
dealing with special fields
davidsbatista Jan 8, 2026
3a3df4c
docstring update
davidsbatista Jan 8, 2026
98ddcf3
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 8, 2026
6b2081b
adding roundtrip tests to assert documents metadata is correctly writ…
davidsbatista Jan 8, 2026
4f8ab78
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 8, 2026
f400fd8
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 9, 2026
535897f
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 9, 2026
24ab439
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 12, 2026
923081e
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
abd4b7f
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
dfcb8ec
updating function names
davidsbatista Jan 13, 2026
a926d03
updating function names + tests
davidsbatista Jan 13, 2026
c626554
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 13, 2026
2a65a49
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
a4d7a6f
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 13, 2026
60406ac
updating function names + tests
davidsbatista Jan 13, 2026
7780e76
adding SQLRetriever + tests
davidsbatista Jan 13, 2026
67852d5
adding missing files
davidsbatista Jan 13, 2026
3985984
adding missing files
davidsbatista Jan 13, 2026
ea69e28
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
7caca70
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
0f86e3b
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
07785d6
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
f704386
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
b6e00ea
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
386130a
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
743a6a5
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
5451173
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
7a95c33
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
4dc5c02
Update integrations/opensearch/src/haystack_integrations/document_sto…
davidsbatista Jan 13, 2026
3eba199
Merge branch 'main' into feat/add-count-filtering-to-OpenSearchDocume…
davidsbatista Jan 13, 2026
a9f35d2
PR comments/fixes
davidsbatista Jan 13, 2026
eb261f9
fixes
davidsbatista Jan 13, 2026
99a17db
improving docstring
davidsbatista Jan 13, 2026
f5eaf4b
updating docs
davidsbatista Jan 13, 2026
22cf8bc
Merge branch 'main' into feat/add-SQLRetriever
davidsbatista Jan 14, 2026
1f22a63
Merge branch 'main' into feat/add-SQLRetriever
davidsbatista Jan 16, 2026
04981fe
resolving conflicts
davidsbatista Jan 16, 2026
3cd5b85
Merge branch 'main' into feat/add-SQLRetriever
davidsbatista Jan 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions integrations/opensearch/pydoc/config_docusaurus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ loaders:
- haystack_integrations.components.retrievers.opensearch.bm25_retriever
- haystack_integrations.components.retrievers.opensearch.embedding_retriever
- haystack_integrations.components.retrievers.opensearch.open_search_hybrid_retriever
- haystack_integrations.components.retrievers.opensearch.sql_retriever
- haystack_integrations.document_stores.opensearch.document_store
- haystack_integrations.document_stores.opensearch.filters
search_path:
Expand Down
4 changes: 3 additions & 1 deletion integrations/opensearch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]

dependencies = [
"haystack-ai>=2.22.0",
"opensearch-py[async]>=2.4.0,<3"
"opensearch-py[async]>=2.4.0,<3",
"httpx>=0.28.1"
]

[project.urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,11 @@
from .bm25_retriever import OpenSearchBM25Retriever
from .embedding_retriever import OpenSearchEmbeddingRetriever
from .open_search_hybrid_retriever import OpenSearchHybridRetriever
from .sql_retriever import OpenSearchSQLRetriever

__all__ = ["OpenSearchBM25Retriever", "OpenSearchEmbeddingRetriever", "OpenSearchHybridRetriever"]
__all__ = [
"OpenSearchBM25Retriever",
"OpenSearchEmbeddingRetriever",
"OpenSearchHybridRetriever",
"OpenSearchSQLRetriever",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any

from haystack import component, default_from_dict, default_to_dict, logging

from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
from haystack_integrations.document_stores.opensearch.document_store import ResponseFormat

logger = logging.getLogger(__name__)


@component
class OpenSearchSQLRetriever:
"""
Executes raw OpenSearch SQL queries against an OpenSearchDocumentStore.

This component allows you to execute SQL queries directly against the OpenSearch index,
which is useful for fetching metadata, aggregations, and other structured data at runtime.
"""

def __init__(
self,
*,
document_store: OpenSearchDocumentStore,
response_format: ResponseFormat = "json",
raise_on_failure: bool = True,
):
"""
Creates the OpenSearchSQLRetriever component.

:param document_store: An instance of OpenSearchDocumentStore to use with the Retriever.
:param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
- `json`: Returns a list of dictionaries (the _source from each hit). Default.
- `csv`: Returns the response as CSV text.
- `jdbc`: Returns the response in JDBC format.
- `raw`: Returns the raw response as text.
:param raise_on_failure:
Whether to raise an exception if the API call fails. Otherwise, log a warning and return None.

:raises ValueError: If `document_store` is not an instance of OpenSearchDocumentStore.
"""
if not isinstance(document_store, OpenSearchDocumentStore):
msg = "document_store must be an instance of OpenSearchDocumentStore"
raise ValueError(msg)

self._document_store = document_store
self._response_format = response_format
self._raise_on_failure = raise_on_failure

def to_dict(self) -> dict[str, Any]:
"""
Serializes the component to a dictionary.

:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
document_store=self._document_store.to_dict(),
response_format=self._response_format,
raise_on_failure=self._raise_on_failure,
)

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "OpenSearchSQLRetriever":
"""
Deserializes the component from a dictionary.

:param data:
Dictionary to deserialize from.

:returns:
Deserialized component.
"""
data["init_parameters"]["document_store"] = OpenSearchDocumentStore.from_dict(
data["init_parameters"]["document_store"]
)
return default_from_dict(cls, data)

@component.output_types(result=Any)
def run(
self,
query: str,
response_format: ResponseFormat | None = None,
document_store: OpenSearchDocumentStore | None = None,
) -> dict[str, Any]:
"""
Execute a raw OpenSearch SQL query against the index.

:param query: The OpenSearch SQL query to execute.
:param response_format: The format of the response. If not provided, uses the format
specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
:param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.

:returns:
A dictionary containing the query results with the following structure:
- result: The query results in the specified format. For JSON format, returns a list of dictionaries
(the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.

Example:
```python
retriever = OpenSearchSQLRetriever(document_store=document_store)
result = retriever.run(
query="SELECT content, category FROM my_index WHERE category = 'A'"
)
# result["result"] contains a list of dictionaries with the query results
```
"""
if document_store is not None:
if not isinstance(document_store, OpenSearchDocumentStore):
msg = "document_store must be an instance of OpenSearchDocumentStore"
raise ValueError(msg)
doc_store = document_store
else:
doc_store = self._document_store

response_format = response_format or self._response_format

try:
result = doc_store._query_sql(query=query, response_format=response_format)
except Exception as e:
if self._raise_on_failure:
raise e
else:
logger.warning(
"An error during SQL query execution occurred and will be ignored by returning None: {error}",
error=str(e),
exc_info=True,
)
result = None

return {"result": result}

@component.output_types(result=Any)
async def run_async(
self,
query: str,
response_format: ResponseFormat | None = None,
document_store: OpenSearchDocumentStore | None = None,
) -> dict[str, Any]:
"""
Asynchronously execute a raw OpenSearch SQL query against the index.

:param query: The OpenSearch SQL query to execute.
:param response_format: The format of the response. If not provided, uses the format
specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
:param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.

:returns:
A dictionary containing the query results with the following structure:
- result: The query results in the specified format. For JSON format, returns a list of dictionaries
(the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.

Example:
```python
retriever = OpenSearchSQLRetriever(document_store=document_store)
result = await retriever.run_async(
query="SELECT content, category FROM my_index WHERE category = 'A'"
)
# result["result"] contains a list of dictionaries with the query results
```
"""
if document_store is not None:
if not isinstance(document_store, OpenSearchDocumentStore):
msg = "document_store must be an instance of OpenSearchDocumentStore"
raise ValueError(msg)
doc_store = document_store
else:
doc_store = self._document_store

response_format = response_format or self._response_format

try:
result = await doc_store._query_sql_async(query=query, response_format=response_format)
except Exception as e:
if self._raise_on_failure:
raise e
else:
logger.warning(
"An error during SQL query execution occurred and will be ignored by returning None: {error}",
error=str(e),
exc_info=True,
)
result = None

return {"result": result}
Loading
Loading