Skip to content

Commit e81fb1f

Browse files
authored
Enhance vector store testing using Dataset (#1296)
* feat: enhance vector store fixture with dataset support Introduces a lightweight dataset loader (datasets.py) with DatasetDocumentQA and DatasetDocumentMetadata dataclasses, a documents.json manifest for per-file attribute metadata, and QA records across IBM quarterly earnings press releases. Refactor the vector store fixture to support Dataset instances for document uploads, improving test setup flexibility. Update vector store upload functions to handle datasets and their associated attributes. Remove deprecated constants and adjust test cases to use the new dataset structure. Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Made-with: Cursor * feat: enhance vector store fixture with dataset support Refactor the vector store fixture to support Dataset instances for document uploads, improving test setup flexibility. Update vector store upload functions to handle datasets and their associated attributes. Remove deprecated constants and adjust test cases to use the new dataset structure. Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Made-with: Cursor * fix: fix attributes parameters type to be consistent Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * fix: fix dataset access when running the tests from subfolders Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Made-with: Cursor * fix: use f-strings when logging using f-strings to fix structlog not applying %s/%d-style substitution Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Made-with: Cursor --------- Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
1 parent cfdb157 commit e81fb1f

File tree

9 files changed

+678
-160
lines changed

9 files changed

+678
-160
lines changed

tests/llama_stack/conftest.py

Lines changed: 100 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@
3434
UPGRADE_DISTRIBUTION_NAME,
3535
ModelInfo,
3636
)
37+
from tests.llama_stack.datasets import Dataset
3738
from tests.llama_stack.utils import (
3839
create_llama_stack_distribution,
40+
vector_store_upload_dataset,
3941
vector_store_upload_doc_sources,
4042
wait_for_llama_stack_client_ready,
4143
wait_for_unique_llama_stack_pod,
@@ -720,6 +722,36 @@ def llama_stack_models(unprivileged_llama_stack_client: LlamaStackClient) -> Mod
720722
return ModelInfo(model_id=model_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension)
721723

722724

725+
@pytest.fixture(scope="class")
726+
def dataset(request: FixtureRequest) -> Dataset:
727+
"""Return the Dataset passed via indirect parametrize.
728+
729+
This exists as a standalone fixture so that test methods can access the
730+
Dataset (e.g. for QA ground-truth queries) without hardcoding it.
731+
732+
Note: we use this fixture instead of a plain pytest parameter to avoid
733+
fixture dependency problems that were causing Llama Stack dependent resources
734+
like databases or secrets not being created at the right time.
735+
736+
Raises:
737+
pytest.UsageError: If the fixture is not indirect-parametrized or the
738+
parameter is not a :class:`~tests.llama_stack.datasets.Dataset` instance.
739+
"""
740+
if not hasattr(request, "param"):
741+
raise pytest.UsageError(
742+
"The `dataset` fixture must be indirect-parametrized with a Dataset instance "
743+
"(e.g. @pytest.mark.parametrize('dataset', [MY_DATASET], indirect=True)). "
744+
"Without indirect parametrization, `request.param` is missing."
745+
)
746+
param = request.param
747+
if not isinstance(param, Dataset):
748+
raise pytest.UsageError(
749+
"The `dataset` fixture must be indirect-parametrized with a "
750+
f"tests.llama_stack.datasets.Dataset instance; got {type(param).__name__!r}."
751+
)
752+
return param
753+
754+
723755
@pytest.fixture(scope="class")
724756
def vector_store(
725757
unprivileged_llama_stack_client: LlamaStackClient,
@@ -729,61 +761,78 @@ def vector_store(
729761
teardown_resources: bool,
730762
) -> Generator[VectorStore]:
731763
"""
732-
Creates a vector store for testing and automatically cleans it up.
733-
734-
You can have example documents ingested into the store automatically by passing a
735-
non-empty ``doc_sources`` list in the indirect parametrization dict (URLs, files, or
736-
directories under the repo root). Omit ``doc_sources`` when the test only needs an
737-
empty store.
738-
739-
Options when parametrizing with ``indirect=True``:
740-
741-
* ``vector_io_provider`` (optional): backend id for the store; defaults to ``"milvus"``.
742-
* ``doc_sources`` (optional): non-empty list of document sources to upload after creation.
743-
Omitted, empty, or absent means no uploads. Each entry may be:
744-
745-
* A remote URL (``http://`` or ``https://``)
746-
* A repo-relative or absolute file path
747-
* A directory path (all files in the directory are uploaded)
748-
749-
Example:
764+
Fixture to provide a vector store instance for tests.
765+
766+
Given: A configured LlamaStackClient, an embedding model, and test parameters specifying
767+
vector store provider and a dataset or document sources.
768+
When: The fixture is invoked by a parameterized test class or function.
769+
Then: It creates (or reuses, in post-upgrade scenarios) a vector store with the specified
770+
vector I/O provider, optionally uploads a dataset or custom document sources, and ensures
771+
proper cleanup after the test if needed.
772+
773+
Parameter Usage:
774+
- vector_io_provider (str): The provider backend to use for the vector store (e.g., 'milvus',
775+
'faiss', 'pgvector', 'qdrant-remote', etc.). Determines how vector data is persisted and queried.
776+
If not specified, defaults to 'milvus'.
777+
- dataset (Dataset): An instance of the Dataset class (see datasets.py) specifying the documents and
778+
ground-truth QA to upload to the vector store. Use this to quickly populate the store with a
779+
standard test corpus. Mutually exclusive with doc_sources.
780+
- doc_sources (list[str]): A list of document sources to upload to the vector store. Each entry may be:
781+
- A file path (repo-relative or absolute) to a single document.
782+
- A directory path, in which case all files within the directory will be uploaded.
783+
- A remote HTTPS URL to a document (e.g., "https://example.com/mydoc.pdf"), which will be downloaded
784+
and ingested.
785+
`doc_sources` is mutually exclusive with `dataset`.
786+
787+
Examples:
788+
# Example 1: Use dataset to populate the vector store
789+
@pytest.mark.parametrize(
790+
"vector_store",
791+
[
792+
pytest.param(
793+
{"vector_io_provider": "milvus", "dataset": IBM_2025_Q4_EARNINGS},
794+
id="milvus-with-IBM-earnings-dataset",
795+
),
796+
],
797+
indirect=True,
798+
)
750799
800+
# Example 2: Upload local documents by file path
751801
@pytest.mark.parametrize(
752802
"vector_store",
753803
[
754804
pytest.param(
755805
{
756-
"vector_io_provider": "milvus",
806+
"vector_io_provider": "faiss",
757807
"doc_sources": [
758-
"https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6",
759-
"tests/llama_stack/dataset/corpus/finance",
760-
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf",
808+
"tests/llama_stack/dataset/corpus/finance/document1.pdf",
809+
"tests/llama_stack/dataset/corpus/finance/document2.pdf",
761810
],
762811
},
763-
id="doc_sources:url+folder+file",
812+
id="faiss-with-explicit-documents",
764813
),
765814
],
766815
indirect=True,
767816
)
768817
769-
Post-upgrade runs reuse the existing store; uploads run only in the create path when
770-
``doc_sources`` is non-empty (documents from the pre-upgrade run are reused otherwise).
771-
772-
Args:
773-
unprivileged_llama_stack_client: The configured LlamaStackClient
774-
llama_stack_models: Model information including embedding model details
775-
request: Pytest fixture request carrying optional param dict
776-
pytestconfig: Pytest config (post-upgrade reuses store, no create/upload path)
777-
teardown_resources: Whether to delete the store after the class
778-
779818
Yields:
780-
Vector store object that can be used in tests
819+
VectorStore: The created or reused vector store ready for ingestion/search tests.
820+
821+
Raises:
822+
ValueError: If the required vector store is missing in a post-upgrade scenario, or if
823+
both ``dataset`` and ``doc_sources`` are set in params (mutually exclusive).
824+
Exception: If vector store creation or file upload fails, attempts cleanup.
781825
"""
782826

783827
params_raw = getattr(request, "param", None)
784828
params: dict[str, Any] = dict(params_raw) if isinstance(params_raw, dict) else {"vector_io_provider": "milvus"}
785829
vector_io_provider = str(params.get("vector_io_provider") or "milvus")
786-
doc_sources = params.get("doc_sources")
830+
dataset: Dataset | None = params.get("dataset")
831+
doc_sources: list[str] | None = params.get("doc_sources")
832+
if dataset is not None and doc_sources is not None:
833+
raise ValueError(
834+
'vector_store fixture params must set at most one of "dataset" or "doc_sources"; both were provided.'
835+
)
787836

788837
if pytestconfig.option.post_upgrade:
789838
stores = unprivileged_llama_stack_client.vector_stores.list().data
@@ -805,27 +854,27 @@ def vector_store(
805854
)
806855
LOGGER.info(f"vector_store successfully created (provider_id={vector_io_provider}, id={vector_store.id})")
807856

808-
if doc_sources:
857+
if dataset or doc_sources:
809858
try:
810-
vector_store_upload_doc_sources(
811-
doc_sources=doc_sources,
812-
llama_stack_client=unprivileged_llama_stack_client,
813-
vector_store=vector_store,
814-
vector_io_provider=vector_io_provider,
815-
)
859+
if dataset:
860+
vector_store_upload_dataset(
861+
dataset=dataset,
862+
llama_stack_client=unprivileged_llama_stack_client,
863+
vector_store=vector_store,
864+
)
865+
elif doc_sources:
866+
vector_store_upload_doc_sources(
867+
doc_sources=doc_sources,
868+
llama_stack_client=unprivileged_llama_stack_client,
869+
vector_store=vector_store,
870+
vector_io_provider=vector_io_provider,
871+
)
816872
except Exception:
817873
try:
818874
unprivileged_llama_stack_client.vector_stores.delete(vector_store_id=vector_store.id)
819-
LOGGER.info(
820-
"Deleted vector store %s after failed doc_sources ingestion",
821-
vector_store.id,
822-
)
875+
LOGGER.info(f"Deleted vector store {vector_store.id} after failed document ingestion")
823876
except Exception as del_exc: # noqa: BLE001
824-
LOGGER.warning(
825-
"Failed to delete vector store %s after ingestion error: %s",
826-
vector_store.id,
827-
del_exc,
828-
)
877+
LOGGER.warning(f"Failed to delete vector store {vector_store.id} after ingestion error: {del_exc}")
829878
raise
830879

831880
yield vector_store

tests/llama_stack/constants.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -69,31 +69,3 @@ class ModelInfo(NamedTuple):
6969
}
7070

7171
UPGRADE_DISTRIBUTION_NAME = "llama-stack-distribution-upgrade"
72-
73-
IBM_2025_Q4_EARNINGS_DOC_ENCRYPTED = "tests/llama_stack/dataset/corpus/pdf-testing/ibm-4q25-press-release-encrypted.pdf"
74-
IBM_2025_Q4_EARNINGS_DOC_UNENCRYPTED = (
75-
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf"
76-
)
77-
IBM_EARNINGS_SEARCH_QUERIES_BY_MODE: dict[str, list[str]] = {
78-
"vector": [
79-
"How did IBM perform financially in the fourth quarter of 2025?",
80-
"What were the main drivers of revenue growth?",
81-
"What is the company outlook for 2026?",
82-
"How did profit margins change year over year?",
83-
"What did leadership say about generative AI and growth?",
84-
],
85-
"keyword": [
86-
"What was free cash flow in the fourth quarter?",
87-
"What was Consulting revenue and segment profit margin?",
88-
"What was Software revenue and constant currency growth?",
89-
"What was diluted earnings per share for continuing operations?",
90-
"What are full-year 2026 expectations for revenue and free cash flow?",
91-
],
92-
"hybrid": [
93-
"What was IBM free cash flow and what does the company expect for 2026?",
94-
"What were segment results for Software and Infrastructure revenue?",
95-
"What was GAAP gross profit margin and pre-tax income?",
96-
"What did James Kavanaugh say about 2025 results and 2026 prospects?",
97-
"What was Consulting revenue and segment profit margin?",
98-
],
99-
}

0 commit comments

Comments
 (0)