test: add searchmode tests for vector search (#1115)

Bobbins228 · jgarciao · web-flow · commit ee45b28ec1d3 · 2026-02-18T13:20:15.000+01:00
* test: add searchmode tests for vector search

* fix: add missing search parameter

---------

Co-authored-by: Jorge &lt;jgarciao@users.noreply.github.com&gt;
diff --git a/tests/llama_stack/conftest.py b/tests/llama_stack/conftest.py
@@ -66,6 +66,8 @@
 LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS", "8192")
 LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY", "true")
 
+IBM_EARNINGS_DOC_URL = "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6"
+
 distribution_name = generate_random_name(prefix="llama-stack-distribution")
 
 
@@ -776,35 +778,24 @@ def vector_store_with_example_docs(
     unprivileged_llama_stack_client: LlamaStackClient, vector_store: VectorStore
 ) -> Generator[VectorStore, None, None]:
     """
-    Creates a vector store with TorchTune documentation files uploaded.
+    Creates a vector store with the IBM fourth-quarter 2025 earnings report uploaded.
 
-    This fixture depends on the vector_store fixture and uploads the TorchTune
-    documentation files to the vector store for testing purposes. The files
-    are automatically cleaned up after the test completes.
+    This fixture depends on the vector_store fixture and uploads the IBM earnings
+    document to the vector store for testing vector, keyword, and hybrid search.
+    The file is automatically cleaned up after the test completes.
 
     Args:
         unprivileged_llama_stack_client: The configured LlamaStackClient
         vector_store: The vector store fixture to upload files to
 
     Yields:
-        Vector store object with uploaded TorchTune documentation files
+        Vector store object with uploaded IBM earnings report document
     """
-    # Download TorchTune documentation files
-    urls = [
-        "llama3.rst",
-        "chat.rst",
-        "lora_finetune.rst",
-        "qat_finetune.rst",
-        "memory_optimizations.rst",
-    ]
-
-    base_url = "https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/"
-
-    for file_name in urls:
-        url = f"{base_url}{file_name}"
-        vector_store_create_file_from_url(
-            url=url, llama_stack_client=unprivileged_llama_stack_client, vector_store=vector_store
-        )
+    vector_store_create_file_from_url(
+        url=IBM_EARNINGS_DOC_URL,
+        llama_stack_client=unprivileged_llama_stack_client,
+        vector_store=vector_store,
+    )
 
     yield vector_store
 
diff --git a/tests/llama_stack/utils.py b/tests/llama_stack/utils.py
@@ -22,7 +22,9 @@
     LLS_CORE_POD_FILTER,
 )
 
+import os
 import tempfile
+
 import requests
 
 
@@ -276,23 +278,30 @@ def vector_store_create_file_from_url(url: str, llama_stack_client: LlamaStackCl
         response = requests.get(url, timeout=60)
         response.raise_for_status()
 
-        # Save file locally first and pretend it's a txt file, not sure why this is needed
-        # but it works locally without it,
-        # though llama stack version is the newer one.
-        file_name = url.split("/")[-1]
-        local_file_name = file_name.replace(".rst", ".txt")
-        with tempfile.NamedTemporaryFile(mode="wb", suffix=f"_{local_file_name}") as temp_file:
+        content_type = (response.headers.get("Content-Type") or "").split(";")[0].strip().lower()
+        path_part = url.split("/")[-1].split("?")[0]
+
+        if content_type == "application/pdf" or path_part.lower().endswith(".pdf"):
+            file_suffix = ".pdf"
+        elif path_part.lower().endswith(".rst"):
+            file_suffix = "_" + path_part.replace(".rst", ".txt")
+        else:
+            file_suffix = "_" + (path_part or "document.txt")
+
+        with tempfile.NamedTemporaryFile(mode="wb", suffix=file_suffix, delete=False) as temp_file:
             temp_file.write(response.content)
             temp_file_path = temp_file.name
 
-            # Upload saved file to LlamaStack
+        try:
+            # Upload saved file to LlamaStack (filename extension used for parsing)
             with open(temp_file_path, "rb") as file_to_upload:
                 uploaded_file = llama_stack_client.files.create(file=file_to_upload, purpose="assistants")
 
             # Add file to vector store
             llama_stack_client.vector_stores.files.create(vector_store_id=vector_store.id, file_id=uploaded_file.id)
-
-        return True
+            return True
+        finally:
+            os.unlink(temp_file_path)
 
     except (requests.exceptions.RequestException, Exception) as e:
         LOGGER.warning(f"Failed to download and upload file {url}: {e}")
diff --git a/tests/llama_stack/vector_io/test_vector_stores.py b/tests/llama_stack/vector_io/test_vector_stores.py
@@ -12,6 +12,31 @@
 LOGGER = get_logger(name=__name__)
 
 
+IBM_EARNINGS_SEARCH_QUERIES_BY_MODE: dict[str, list[str]] = {
+    "vector": [
+        "How did IBM perform financially in the fourth quarter of 2025?",
+        "What were the main drivers of revenue growth?",
+        "What is the company outlook for 2026?",
+        "How did profit margins change year over year?",
+        "What did leadership say about generative AI and growth?",
+    ],
+    "keyword": [
+        "What was free cash flow in the fourth quarter?",
+        "What was Consulting revenue and segment profit margin?",
+        "What was Software revenue and constant currency growth?",
+        "What was diluted earnings per share for continuing operations?",
+        "What are full-year 2026 expectations for revenue and free cash flow?",
+    ],
+    "hybrid": [
+        "What was IBM free cash flow and what does the company expect for 2026?",
+        "What were segment results for Software and Infrastructure revenue?",
+        "What was GAAP gross profit margin and pre-tax income?",
+        "What did James Kavanaugh say about 2025 results and 2026 prospects?",
+        "What was Consulting revenue and segment profit margin?",
+    ],
+}
+
+
 @pytest.mark.parametrize(
     "unprivileged_model_namespace, llama_stack_server_config, vector_store",
     [
@@ -113,37 +138,36 @@ def test_vector_stores_search(
         """
         Test vector_stores search functionality using the search endpoint.
 
-        Uses a vector store with pre-uploaded TorchTune documentation files and tests the search API
-        to retrieve relevant chunks based on query text. Validates that the search
-        returns relevant results with proper metadata and content.
+        Iterates over vector, keyword, and hybrid search modes using
+        IBM_EARNINGS_SEARCH_QUERIES_BY_MODE. Validates that each mode returns
+        relevant results with proper metadata and content.
         """
 
-        search_queries = [
-            "What is LoRA fine-tuning?",
-            "How does quantization work?",
-            "What are memory optimizations?",
-            "Tell me about DoRA",
-            "What is TorchTune?",
-        ]
-
-        for query in search_queries:
-            # Use the vector store search endpoint
-            search_response = unprivileged_llama_stack_client.vector_stores.search(
-                vector_store_id=vector_store_with_example_docs.id, query=query
-            )
-
-            # Validate search response
-            assert search_response is not None, f"Search response is None for query: {query}"
-            assert hasattr(search_response, "data"), "Search response missing 'data' attribute"
-            assert isinstance(search_response.data, list), "Search response data should be a list"
-
-            # Check that we got some results
-            assert len(search_response.data) > 0, f"No search results returned for query: {query}"
-
-            # Validate each search result
-            for result in search_response.data:
-                assert hasattr(result, "content"), "Search result missing 'content' attribute"
-                assert result.content is not None, "Search result content should not be None"
-                assert len(result.content) > 0, "Search result content should not be empty"
-
-        LOGGER.info(f"Successfully tested vector store search with {len(search_queries)} queries")
+        provider_id = vector_store_with_example_docs.metadata.get("provider_id", "")
+        # FAISS does not support hybrid and keyword search modes see:
+        # https://github.com/llamastack/llama-stack/blob/main/src/llama_stack/providers/inline/vector_io/faiss/faiss.py#L180-L196
+        search_modes = ["vector"] if provider_id == "faiss" else list(IBM_EARNINGS_SEARCH_QUERIES_BY_MODE)
+
+        for search_mode in search_modes:
+            queries = IBM_EARNINGS_SEARCH_QUERIES_BY_MODE[search_mode]
+            for query in queries:
+                search_response = unprivileged_llama_stack_client.vector_stores.search(
+                    vector_store_id=vector_store_with_example_docs.id,
+                    query=query,
+                    search_mode=search_mode,
+                    max_num_results=10,
+                )
+
+                assert search_response is not None, f"Search response is None for mode={search_mode!r} query={query!r}"
+                assert hasattr(search_response, "data"), "Search response missing 'data' attribute"
+                assert isinstance(search_response.data, list), "Search response data should be a list"
+                assert len(search_response.data) > 0, f"No search results for mode={search_mode!r} query={query!r}"
+
+                for result in search_response.data:
+                    assert hasattr(result, "content"), "Search result missing 'content' attribute"
+                    assert result.content is not None, "Search result content should not be None"
+                    assert len(result.content) > 0, "Search result content should not be empty"
+
+            LOGGER.info(f"Search mode {search_mode!r}: {len(queries)} queries returned results")
+
+        LOGGER.info(f"Successfully tested vector store search across modes: {search_modes}")