Skip to content

Commit cec4b5c

Browse files
BenMcHmarlenezw
andauthored
Bug fix: Escapes AzureCosmosDBNoSqlVectorStore hybrid & full text queries that contain single quotes (#39)
Co-authored-by: Marlene <57748216+marlenezw@users.noreply.github.com>
1 parent e294fa1 commit cec4b5c

File tree

3 files changed

+53
-15
lines changed

3 files changed

+53
-15
lines changed

libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_no_sql.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -683,17 +683,24 @@ def _construct_query(
683683
"full_text_rank_filter cannot be None for FULL_TEXT_RANK queries."
684684
)
685685
if len(full_text_rank_filter) == 1:
686+
text = (
687+
full_text_rank_filter[0]["search_text"].replace("'", "\\'").split()
688+
)
689+
686690
query += f""" ORDER BY RANK FullTextScore(c.{full_text_rank_filter[0]["search_field"]},
687-
[{", ".join(f"'{term}'" for term in full_text_rank_filter[0]["search_text"].split())}])""" # noqa:E501
691+
[{", ".join(f"'{term}'" for term in text)}])""" # noqa:E501
688692
else:
689-
rank_components = [
690-
f"FullTextScore(c.{search_item['search_field']}, ["
691-
+ ", ".join(
692-
f"'{term}'" for term in search_item["search_text"].split()
693+
rank_components = []
694+
695+
for search_item in full_text_rank_filter:
696+
text = search_item["search_text"].replace("'", "\\'").split()
697+
698+
rank_components.append(
699+
f"FullTextScore(c.{search_item['search_field']}, ["
700+
+ ", ".join(f"'{term}'" for term in text)
701+
+ "])"
693702
)
694-
+ "])"
695-
for search_item in full_text_rank_filter
696-
]
703+
697704
query = f" ORDER BY RANK RRF({', '.join(rank_components)})"
698705
elif search_type == "vector":
699706
query += " ORDER BY VectorDistance(c[@embeddingKey], @embeddings)"
@@ -702,12 +709,15 @@ def _construct_query(
702709
raise ValueError(
703710
"full_text_rank_filter cannot be None for HYBRID queries."
704711
)
705-
rank_components = [
706-
f"FullTextScore(c.{search_item['search_field']}, ["
707-
+ ", ".join(f"'{term}'" for term in search_item["search_text"].split())
708-
+ "])"
709-
for search_item in full_text_rank_filter
710-
]
712+
rank_components = []
713+
714+
for search_item in full_text_rank_filter:
715+
text = search_item["search_text"].replace("'", "\\'").split()
716+
rank_components.append(
717+
f"FullTextScore(c.{search_item['search_field']}, ["
718+
+ ", ".join(f"'{term}'" for term in text)
719+
+ "])"
720+
)
711721
query += f""" ORDER BY RANK RRF({', '.join(rank_components)},
712722
VectorDistance(c.{self._vector_search_fields["embedding_field"]}, {embeddings}))""" # noqa:E501
713723
else:

libs/azure-ai/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,19 @@ def test_from_documents_full_text_and_hybrid(
361361
assert len(output) == 5
362362
assert "Standard Poodles" in output[0].page_content
363363

364+
# Full text search successfully queries for data with a single quote
365+
full_text_rank_filter = [{"search_field": "text", "search_text": "'Herders'"}]
366+
output = store.similarity_search(
367+
"Which dog breed is considered a herder?",
368+
k=5,
369+
query_type="full_text_search",
370+
full_text_rank_filter=full_text_rank_filter,
371+
)
372+
373+
assert output
374+
assert len(output) == 5
375+
assert "Retrievers" in output[0].page_content
376+
364377
# Full text search BM25 ranking with filtering
365378
pre_filter = PreFilter(
366379
conditions=[
@@ -397,6 +410,19 @@ def test_from_documents_full_text_and_hybrid(
397410
assert len(output) == 5
398411
assert "Border Collies" in output[0].page_content
399412

413+
# Hybrid search successfully queries for data with a single quote
414+
full_text_rank_filter = [{"search_field": "text", "search_text": "'energetic'"}]
415+
output = store.similarity_search(
416+
"Which breed is energetic?",
417+
k=5,
418+
query_type="hybrid",
419+
full_text_rank_filter=full_text_rank_filter,
420+
)
421+
422+
assert output
423+
assert len(output) == 5
424+
assert "Border Collies" in output[0].page_content
425+
400426
# Hybrid search RRF ranking with filtering
401427
pre_filter = PreFilter(
402428
conditions=[

libs/azure-ai/tests/unit_tests/test_chat_models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall
2020

2121
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
22-
from langchain_azure_ai.chat_models.inference import _format_tool_call_for_azure_inference
22+
from langchain_azure_ai.chat_models.inference import (
23+
_format_tool_call_for_azure_inference,
24+
)
2325

2426
logger = logging.getLogger(__name__)
2527

0 commit comments

Comments
 (0)