Skip to content

Commit ee45b28

Browse files
Bobbins228jgarciao
andauthored
test: add searchmode tests for vector search (#1115)
* test: add searchmode tests for vector search * fix: add missing search parameter --------- Co-authored-by: Jorge <jgarciao@users.noreply.github.com>
1 parent b8a709f commit ee45b28

3 files changed

Lines changed: 86 additions & 62 deletions

File tree

tests/llama_stack/conftest.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS", "8192")
6767
LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY", "true")
6868

69+
IBM_EARNINGS_DOC_URL = "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6"
70+
6971
distribution_name = generate_random_name(prefix="llama-stack-distribution")
7072

7173

@@ -776,35 +778,24 @@ def vector_store_with_example_docs(
776778
unprivileged_llama_stack_client: LlamaStackClient, vector_store: VectorStore
777779
) -> Generator[VectorStore, None, None]:
778780
"""
779-
Creates a vector store with TorchTune documentation files uploaded.
781+
Creates a vector store with the IBM fourth-quarter 2025 earnings report uploaded.
780782
781-
This fixture depends on the vector_store fixture and uploads the TorchTune
782-
documentation files to the vector store for testing purposes. The files
783-
are automatically cleaned up after the test completes.
783+
This fixture depends on the vector_store fixture and uploads the IBM earnings
784+
document to the vector store for testing vector, keyword, and hybrid search.
785+
The file is automatically cleaned up after the test completes.
784786
785787
Args:
786788
unprivileged_llama_stack_client: The configured LlamaStackClient
787789
vector_store: The vector store fixture to upload files to
788790
789791
Yields:
790-
Vector store object with uploaded TorchTune documentation files
792+
Vector store object with uploaded IBM earnings report document
791793
"""
792-
# Download TorchTune documentation files
793-
urls = [
794-
"llama3.rst",
795-
"chat.rst",
796-
"lora_finetune.rst",
797-
"qat_finetune.rst",
798-
"memory_optimizations.rst",
799-
]
800-
801-
base_url = "https://raw.githubusercontent.com/pytorch/torchtune/refs/tags/v0.6.1/docs/source/tutorials/"
802-
803-
for file_name in urls:
804-
url = f"{base_url}{file_name}"
805-
vector_store_create_file_from_url(
806-
url=url, llama_stack_client=unprivileged_llama_stack_client, vector_store=vector_store
807-
)
794+
vector_store_create_file_from_url(
795+
url=IBM_EARNINGS_DOC_URL,
796+
llama_stack_client=unprivileged_llama_stack_client,
797+
vector_store=vector_store,
798+
)
808799

809800
yield vector_store
810801

tests/llama_stack/utils.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
LLS_CORE_POD_FILTER,
2323
)
2424

25+
import os
2526
import tempfile
27+
2628
import requests
2729

2830

@@ -276,23 +278,30 @@ def vector_store_create_file_from_url(url: str, llama_stack_client: LlamaStackCl
276278
response = requests.get(url, timeout=60)
277279
response.raise_for_status()
278280

279-
# Save file locally first and pretend it's a txt file, not sure why this is needed
280-
# but it works locally without it,
281-
# though llama stack version is the newer one.
282-
file_name = url.split("/")[-1]
283-
local_file_name = file_name.replace(".rst", ".txt")
284-
with tempfile.NamedTemporaryFile(mode="wb", suffix=f"_{local_file_name}") as temp_file:
281+
content_type = (response.headers.get("Content-Type") or "").split(";")[0].strip().lower()
282+
path_part = url.split("/")[-1].split("?")[0]
283+
284+
if content_type == "application/pdf" or path_part.lower().endswith(".pdf"):
285+
file_suffix = ".pdf"
286+
elif path_part.lower().endswith(".rst"):
287+
file_suffix = "_" + path_part.replace(".rst", ".txt")
288+
else:
289+
file_suffix = "_" + (path_part or "document.txt")
290+
291+
with tempfile.NamedTemporaryFile(mode="wb", suffix=file_suffix, delete=False) as temp_file:
285292
temp_file.write(response.content)
286293
temp_file_path = temp_file.name
287294

288-
# Upload saved file to LlamaStack
295+
try:
296+
# Upload saved file to LlamaStack (filename extension used for parsing)
289297
with open(temp_file_path, "rb") as file_to_upload:
290298
uploaded_file = llama_stack_client.files.create(file=file_to_upload, purpose="assistants")
291299

292300
# Add file to vector store
293301
llama_stack_client.vector_stores.files.create(vector_store_id=vector_store.id, file_id=uploaded_file.id)
294-
295-
return True
302+
return True
303+
finally:
304+
os.unlink(temp_file_path)
296305

297306
except (requests.exceptions.RequestException, Exception) as e:
298307
LOGGER.warning(f"Failed to download and upload file {url}: {e}")

tests/llama_stack/vector_io/test_vector_stores.py

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,31 @@
1212
LOGGER = get_logger(name=__name__)
1313

1414

15+
IBM_EARNINGS_SEARCH_QUERIES_BY_MODE: dict[str, list[str]] = {
16+
"vector": [
17+
"How did IBM perform financially in the fourth quarter of 2025?",
18+
"What were the main drivers of revenue growth?",
19+
"What is the company outlook for 2026?",
20+
"How did profit margins change year over year?",
21+
"What did leadership say about generative AI and growth?",
22+
],
23+
"keyword": [
24+
"What was free cash flow in the fourth quarter?",
25+
"What was Consulting revenue and segment profit margin?",
26+
"What was Software revenue and constant currency growth?",
27+
"What was diluted earnings per share for continuing operations?",
28+
"What are full-year 2026 expectations for revenue and free cash flow?",
29+
],
30+
"hybrid": [
31+
"What was IBM free cash flow and what does the company expect for 2026?",
32+
"What were segment results for Software and Infrastructure revenue?",
33+
"What was GAAP gross profit margin and pre-tax income?",
34+
"What did James Kavanaugh say about 2025 results and 2026 prospects?",
35+
"What was Consulting revenue and segment profit margin?",
36+
],
37+
}
38+
39+
1540
@pytest.mark.parametrize(
1641
"unprivileged_model_namespace, llama_stack_server_config, vector_store",
1742
[
@@ -113,37 +138,36 @@ def test_vector_stores_search(
113138
"""
114139
Test vector_stores search functionality using the search endpoint.
115140
116-
Uses a vector store with pre-uploaded TorchTune documentation files and tests the search API
117-
to retrieve relevant chunks based on query text. Validates that the search
118-
returns relevant results with proper metadata and content.
141+
Iterates over vector, keyword, and hybrid search modes using
142+
IBM_EARNINGS_SEARCH_QUERIES_BY_MODE. Validates that each mode returns
143+
relevant results with proper metadata and content.
119144
"""
120145

121-
search_queries = [
122-
"What is LoRA fine-tuning?",
123-
"How does quantization work?",
124-
"What are memory optimizations?",
125-
"Tell me about DoRA",
126-
"What is TorchTune?",
127-
]
128-
129-
for query in search_queries:
130-
# Use the vector store search endpoint
131-
search_response = unprivileged_llama_stack_client.vector_stores.search(
132-
vector_store_id=vector_store_with_example_docs.id, query=query
133-
)
134-
135-
# Validate search response
136-
assert search_response is not None, f"Search response is None for query: {query}"
137-
assert hasattr(search_response, "data"), "Search response missing 'data' attribute"
138-
assert isinstance(search_response.data, list), "Search response data should be a list"
139-
140-
# Check that we got some results
141-
assert len(search_response.data) > 0, f"No search results returned for query: {query}"
142-
143-
# Validate each search result
144-
for result in search_response.data:
145-
assert hasattr(result, "content"), "Search result missing 'content' attribute"
146-
assert result.content is not None, "Search result content should not be None"
147-
assert len(result.content) > 0, "Search result content should not be empty"
148-
149-
LOGGER.info(f"Successfully tested vector store search with {len(search_queries)} queries")
146+
provider_id = vector_store_with_example_docs.metadata.get("provider_id", "")
147+
# FAISS does not support hybrid and keyword search modes see:
148+
# https://github.com/llamastack/llama-stack/blob/main/src/llama_stack/providers/inline/vector_io/faiss/faiss.py#L180-L196
149+
search_modes = ["vector"] if provider_id == "faiss" else list(IBM_EARNINGS_SEARCH_QUERIES_BY_MODE)
150+
151+
for search_mode in search_modes:
152+
queries = IBM_EARNINGS_SEARCH_QUERIES_BY_MODE[search_mode]
153+
for query in queries:
154+
search_response = unprivileged_llama_stack_client.vector_stores.search(
155+
vector_store_id=vector_store_with_example_docs.id,
156+
query=query,
157+
search_mode=search_mode,
158+
max_num_results=10,
159+
)
160+
161+
assert search_response is not None, f"Search response is None for mode={search_mode!r} query={query!r}"
162+
assert hasattr(search_response, "data"), "Search response missing 'data' attribute"
163+
assert isinstance(search_response.data, list), "Search response data should be a list"
164+
assert len(search_response.data) > 0, f"No search results for mode={search_mode!r} query={query!r}"
165+
166+
for result in search_response.data:
167+
assert hasattr(result, "content"), "Search result missing 'content' attribute"
168+
assert result.content is not None, "Search result content should not be None"
169+
assert len(result.content) > 0, "Search result content should not be empty"
170+
171+
LOGGER.info(f"Search mode {search_mode!r}: {len(queries)} queries returned results")
172+
173+
LOGGER.info(f"Successfully tested vector store search across modes: {search_modes}")

0 commit comments

Comments
 (0)