3434 UPGRADE_DISTRIBUTION_NAME ,
3535 ModelInfo ,
3636)
37+ from tests .llama_stack .datasets import Dataset
3738from tests .llama_stack .utils import (
3839 create_llama_stack_distribution ,
40+ vector_store_upload_dataset ,
3941 vector_store_upload_doc_sources ,
4042 wait_for_llama_stack_client_ready ,
4143 wait_for_unique_llama_stack_pod ,
@@ -720,6 +722,36 @@ def llama_stack_models(unprivileged_llama_stack_client: LlamaStackClient) -> Mod
720722 return ModelInfo (model_id = model_id , embedding_model = embedding_model , embedding_dimension = embedding_dimension )
721723
722724
725+ @pytest .fixture (scope = "class" )
726+ def dataset (request : FixtureRequest ) -> Dataset :
727+ """Return the Dataset passed via indirect parametrize.
728+
729+ This exists as a standalone fixture so that test methods can access the
730+ Dataset (e.g. for QA ground-truth queries) without hardcoding it.
731+
732+ Note: we use this fixture instead of a plain pytest parameter to avoid
733+ fixture dependency problems that were causing Llama Stack dependent resources
734+ like databases or secrets not being created at the right time.
735+
736+ Raises:
737+ pytest.UsageError: If the fixture is not indirect-parametrized or the
738+ parameter is not a :class:`~tests.llama_stack.datasets.Dataset` instance.
739+ """
740+ if not hasattr (request , "param" ):
741+ raise pytest .UsageError (
742+ "The `dataset` fixture must be indirect-parametrized with a Dataset instance "
743+ "(e.g. @pytest.mark.parametrize('dataset', [MY_DATASET], indirect=True)). "
744+ "Without indirect parametrization, `request.param` is missing."
745+ )
746+ param = request .param
747+ if not isinstance (param , Dataset ):
748+ raise pytest .UsageError (
749+ "The `dataset` fixture must be indirect-parametrized with a "
750+ f"tests.llama_stack.datasets.Dataset instance; got { type (param ).__name__ !r} ."
751+ )
752+ return param
753+
754+
723755@pytest .fixture (scope = "class" )
724756def vector_store (
725757 unprivileged_llama_stack_client : LlamaStackClient ,
@@ -729,61 +761,78 @@ def vector_store(
729761 teardown_resources : bool ,
730762) -> Generator [VectorStore ]:
731763 """
732- Creates a vector store for testing and automatically cleans it up.
733-
734- You can have example documents ingested into the store automatically by passing a
735- non-empty ``doc_sources`` list in the indirect parametrization dict (URLs, files, or
736- directories under the repo root). Omit ``doc_sources`` when the test only needs an
737- empty store.
738-
739- Options when parametrizing with ``indirect=True``:
740-
741- * ``vector_io_provider`` (optional): backend id for the store; defaults to ``"milvus"``.
742- * ``doc_sources`` (optional): non-empty list of document sources to upload after creation.
743- Omitted, empty, or absent means no uploads. Each entry may be:
744-
745- * A remote URL (``http://`` or ``https://``)
746- * A repo-relative or absolute file path
747- * A directory path (all files in the directory are uploaded)
748-
749- Example:
764+ Fixture to provide a vector store instance for tests.
765+
766+ Given: A configured LlamaStackClient, an embedding model, and test parameters specifying
767+ vector store provider and a dataset or document sources.
768+ When: The fixture is invoked by a parameterized test class or function.
769+ Then: It creates (or reuses, in post-upgrade scenarios) a vector store with the specified
770+ vector I/O provider, optionally uploads a dataset or custom document sources, and ensures
771+ proper cleanup after the test if needed.
772+
773+ Parameter Usage:
774+ - vector_io_provider (str): The provider backend to use for the vector store (e.g., 'milvus',
775+ 'faiss', 'pgvector', 'qdrant-remote', etc.). Determines how vector data is persisted and queried.
776+ If not specified, defaults to 'milvus'.
777+ - dataset (Dataset): An instance of the Dataset class (see datasets.py) specifying the documents and
778+ ground-truth QA to upload to the vector store. Use this to quickly populate the store with a
779+ standard test corpus. Mutually exclusive with doc_sources.
780+ - doc_sources (list[str]): A list of document sources to upload to the vector store. Each entry may be:
781+ - A file path (repo-relative or absolute) to a single document.
782+ - A directory path, in which case all files within the directory will be uploaded.
783+ - A remote HTTPS URL to a document (e.g., "https://example.com/mydoc.pdf"), which will be downloaded
784+ and ingested.
785+ `doc_sources` is mutually exclusive with `dataset`.
786+
787+ Examples:
788+ # Example 1: Use dataset to populate the vector store
789+ @pytest.mark.parametrize(
790+ "vector_store",
791+ [
792+ pytest.param(
793+ {"vector_io_provider": "milvus", "dataset": IBM_2025_Q4_EARNINGS},
794+ id="milvus-with-IBM-earnings-dataset",
795+ ),
796+ ],
797+ indirect=True,
798+ )
750799
800+ # Example 2: Upload local documents by file path
751801 @pytest.mark.parametrize(
752802 "vector_store",
753803 [
754804 pytest.param(
755805 {
756- "vector_io_provider": "milvus ",
806+ "vector_io_provider": "faiss ",
757807 "doc_sources": [
758- "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6",
759- "tests/llama_stack/dataset/corpus/finance",
760- "tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf",
808+ "tests/llama_stack/dataset/corpus/finance/document1.pdf",
809+ "tests/llama_stack/dataset/corpus/finance/document2.pdf",
761810 ],
762811 },
763- id="doc_sources:url+folder+file ",
812+ id="faiss-with-explicit-documents ",
764813 ),
765814 ],
766815 indirect=True,
767816 )
768817
769- Post-upgrade runs reuse the existing store; uploads run only in the create path when
770- ``doc_sources`` is non-empty (documents from the pre-upgrade run are reused otherwise).
771-
772- Args:
773- unprivileged_llama_stack_client: The configured LlamaStackClient
774- llama_stack_models: Model information including embedding model details
775- request: Pytest fixture request carrying optional param dict
776- pytestconfig: Pytest config (post-upgrade reuses store, no create/upload path)
777- teardown_resources: Whether to delete the store after the class
778-
779818 Yields:
780- Vector store object that can be used in tests
819+ VectorStore: The created or reused vector store ready for ingestion/search tests.
820+
821+ Raises:
822+ ValueError: If the required vector store is missing in a post-upgrade scenario, or if
823+ both ``dataset`` and ``doc_sources`` are set in params (mutually exclusive).
824+ Exception: If vector store creation or file upload fails, attempts cleanup.
781825 """
782826
783827 params_raw = getattr (request , "param" , None )
784828 params : dict [str , Any ] = dict (params_raw ) if isinstance (params_raw , dict ) else {"vector_io_provider" : "milvus" }
785829 vector_io_provider = str (params .get ("vector_io_provider" ) or "milvus" )
786- doc_sources = params .get ("doc_sources" )
830+ dataset : Dataset | None = params .get ("dataset" )
831+ doc_sources : list [str ] | None = params .get ("doc_sources" )
832+ if dataset is not None and doc_sources is not None :
833+ raise ValueError (
834+ 'vector_store fixture params must set at most one of "dataset" or "doc_sources"; both were provided.'
835+ )
787836
788837 if pytestconfig .option .post_upgrade :
789838 stores = unprivileged_llama_stack_client .vector_stores .list ().data
@@ -805,27 +854,27 @@ def vector_store(
805854 )
806855 LOGGER .info (f"vector_store successfully created (provider_id={ vector_io_provider } , id={ vector_store .id } )" )
807856
808- if doc_sources :
857+ if dataset or doc_sources :
809858 try :
810- vector_store_upload_doc_sources (
811- doc_sources = doc_sources ,
812- llama_stack_client = unprivileged_llama_stack_client ,
813- vector_store = vector_store ,
814- vector_io_provider = vector_io_provider ,
815- )
859+ if dataset :
860+ vector_store_upload_dataset (
861+ dataset = dataset ,
862+ llama_stack_client = unprivileged_llama_stack_client ,
863+ vector_store = vector_store ,
864+ )
865+ elif doc_sources :
866+ vector_store_upload_doc_sources (
867+ doc_sources = doc_sources ,
868+ llama_stack_client = unprivileged_llama_stack_client ,
869+ vector_store = vector_store ,
870+ vector_io_provider = vector_io_provider ,
871+ )
816872 except Exception :
817873 try :
818874 unprivileged_llama_stack_client .vector_stores .delete (vector_store_id = vector_store .id )
819- LOGGER .info (
820- "Deleted vector store %s after failed doc_sources ingestion" ,
821- vector_store .id ,
822- )
875+ LOGGER .info (f"Deleted vector store { vector_store .id } after failed document ingestion" )
823876 except Exception as del_exc : # noqa: BLE001
824- LOGGER .warning (
825- "Failed to delete vector store %s after ingestion error: %s" ,
826- vector_store .id ,
827- del_exc ,
828- )
877+ LOGGER .warning (f"Failed to delete vector store { vector_store .id } after ingestion error: { del_exc } " )
829878 raise
830879
831880 yield vector_store
0 commit comments