Bug fix: Escapes AzureCosmosDBNoSqlVectorStore hybrid & full text queries that contain single quotes (#39)

BenMcH · marlenezw · web-flow · commit cec4b5cf6f3e · 2025-04-18T15:22:06.000-07:00
Co-authored-by: Marlene &lt;57748216+marlenezw@users.noreply.github.com&gt;
diff --git a/libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_no_sql.py b/libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_no_sql.py
@@ -683,17 +683,24 @@ def _construct_query(
                     "full_text_rank_filter cannot be None for FULL_TEXT_RANK queries."
                 )
             if len(full_text_rank_filter) == 1:
+                text = (
+                    full_text_rank_filter[0]["search_text"].replace("'", "\\'").split()
+                )
+
                 query += f""" ORDER BY RANK FullTextScore(c.{full_text_rank_filter[0]["search_field"]}, 
-                [{", ".join(f"'{term}'" for term in full_text_rank_filter[0]["search_text"].split())}])"""  # noqa:E501
+                [{", ".join(f"'{term}'" for term in text)}])"""  # noqa:E501
             else:
-                rank_components = [
-                    f"FullTextScore(c.{search_item['search_field']}, ["
-                    + ", ".join(
-                        f"'{term}'" for term in search_item["search_text"].split()
+                rank_components = []
+
+                for search_item in full_text_rank_filter:
+                    text = search_item["search_text"].replace("'", "\\'").split()
+
+                    rank_components.append(
+                        f"FullTextScore(c.{search_item['search_field']}, ["
+                        + ", ".join(f"'{term}'" for term in text)
+                        + "])"
                     )
-                    + "])"
-                    for search_item in full_text_rank_filter
-                ]
+
                 query = f" ORDER BY RANK RRF({', '.join(rank_components)})"
         elif search_type == "vector":
             query += " ORDER BY VectorDistance(c[@embeddingKey], @embeddings)"
@@ -702,12 +709,15 @@ def _construct_query(
                 raise ValueError(
                     "full_text_rank_filter cannot be None for HYBRID queries."
                 )
-            rank_components = [
-                f"FullTextScore(c.{search_item['search_field']}, ["
-                + ", ".join(f"'{term}'" for term in search_item["search_text"].split())
-                + "])"
-                for search_item in full_text_rank_filter
-            ]
+            rank_components = []
+
+            for search_item in full_text_rank_filter:
+                text = search_item["search_text"].replace("'", "\\'").split()
+                rank_components.append(
+                    f"FullTextScore(c.{search_item['search_field']}, ["
+                    + ", ".join(f"'{term}'" for term in text)
+                    + "])"
+                )
             query += f""" ORDER BY RANK RRF({', '.join(rank_components)}, 
             VectorDistance(c.{self._vector_search_fields["embedding_field"]}, {embeddings}))"""  # noqa:E501
         else:
diff --git a/libs/azure-ai/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/azure-ai/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py
@@ -361,6 +361,19 @@ def test_from_documents_full_text_and_hybrid(
         assert len(output) == 5
         assert "Standard Poodles" in output[0].page_content
 
+        # Full text search successfully queries for data with a single quote
+        full_text_rank_filter = [{"search_field": "text", "search_text": "'Herders'"}]
+        output = store.similarity_search(
+            "Which dog breed is considered a herder?",
+            k=5,
+            query_type="full_text_search",
+            full_text_rank_filter=full_text_rank_filter,
+        )
+
+        assert output
+        assert len(output) == 5
+        assert "Retrievers" in output[0].page_content
+
         # Full text search BM25 ranking with filtering
         pre_filter = PreFilter(
             conditions=[
@@ -397,6 +410,19 @@ def test_from_documents_full_text_and_hybrid(
         assert len(output) == 5
         assert "Border Collies" in output[0].page_content
 
+        # Hybrid search successfully queries for data with a single quote
+        full_text_rank_filter = [{"search_field": "text", "search_text": "'energetic'"}]
+        output = store.similarity_search(
+            "Which breed is energetic?",
+            k=5,
+            query_type="hybrid",
+            full_text_rank_filter=full_text_rank_filter,
+        )
+
+        assert output
+        assert len(output) == 5
+        assert "Border Collies" in output[0].page_content
+
         # Hybrid search RRF ranking with filtering
         pre_filter = PreFilter(
             conditions=[
diff --git a/libs/azure-ai/tests/unit_tests/test_chat_models.py b/libs/azure-ai/tests/unit_tests/test_chat_models.py
@@ -19,7 +19,9 @@
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolCall
 
 from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
-from langchain_azure_ai.chat_models.inference import _format_tool_call_for_azure_inference
+from langchain_azure_ai.chat_models.inference import (
+    _format_tool_call_for_azure_inference,
+)
 
 logger = logging.getLogger(__name__)