11import os
22from collections .abc import Callable , Generator
3+ from pathlib import Path
34from typing import Any
45
56import httpx
1920from simple_logger .logger import get_logger
2021
2122from tests .llama_stack .constants import (
23+ LLAMA_STACK_DISTRIBUTION_SECRET_DATA ,
24+ LLS_CORE_EMBEDDING_MODEL ,
25+ LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID ,
26+ LLS_CORE_INFERENCE_MODEL ,
27+ LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS ,
28+ LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY ,
29+ LLS_CORE_VLLM_EMBEDDING_URL ,
30+ LLS_CORE_VLLM_MAX_TOKENS ,
31+ LLS_CORE_VLLM_TLS_VERIFY ,
32+ LLS_CORE_VLLM_URL ,
2233 LLS_OPENSHIFT_MINIMAL_VERSION ,
34+ POSTGRES_IMAGE ,
35+ UPGRADE_DISTRIBUTION_NAME ,
2336 ModelInfo ,
2437)
2538from tests .llama_stack .utils import (
2639 create_llama_stack_distribution ,
40+ vector_store_create_file_from_path ,
2741 vector_store_create_file_from_url ,
2842 wait_for_llama_stack_client_ready ,
2943 wait_for_unique_llama_stack_pod ,
3751
3852pytestmark = pytest .mark .skip_on_disconnected
3953
40- POSTGRES_IMAGE = os .getenv (
41- "LLS_VECTOR_IO_POSTGRES_IMAGE" ,
42- (
43- "registry.redhat.io/rhel9/postgresql-15@sha256:"
44- "90ec347a35ab8a5d530c8d09f5347b13cc71df04f3b994bfa8b1a409b1171d59" # postgres 15 # pragma: allowlist secret
45- ),
46- )
47-
48- POSTGRESQL_USER = os .getenv ("LLS_VECTOR_IO_POSTGRESQL_USER" , "ps_user" )
49- POSTGRESQL_PASSWORD = os .getenv ("LLS_VECTOR_IO_POSTGRESQL_PASSWORD" , "ps_password" )
50-
51- LLS_CORE_INFERENCE_MODEL = os .getenv ("LLS_CORE_INFERENCE_MODEL" , "" )
52- LLS_CORE_VLLM_URL = os .getenv ("LLS_CORE_VLLM_URL" , "" )
53- LLS_CORE_VLLM_API_TOKEN = os .getenv ("LLS_CORE_VLLM_API_TOKEN" , "" )
54- LLS_CORE_VLLM_MAX_TOKENS = os .getenv ("LLS_CORE_VLLM_MAX_TOKENS" , "16384" )
55- LLS_CORE_VLLM_TLS_VERIFY = os .getenv ("LLS_CORE_VLLM_TLS_VERIFY" , "true" )
56-
57- LLS_CORE_EMBEDDING_MODEL = os .getenv ("LLS_CORE_EMBEDDING_MODEL" , "nomic-embed-text-v1-5" )
58- LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID = os .getenv ("LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID" , "nomic-embed-text-v1-5" )
59- LLS_CORE_VLLM_EMBEDDING_URL = os .getenv (
60- "LLS_CORE_VLLM_EMBEDDING_URL" , "https://nomic-embed-text-v1-5.example.com:443/v1"
61- )
62- LLS_CORE_VLLM_EMBEDDING_API_TOKEN = os .getenv ("LLS_CORE_VLLM_EMBEDDING_API_TOKEN" , "fake" )
63- LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS = os .getenv ("LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS" , "8192" )
64- LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY = os .getenv ("LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY" , "true" )
65-
66- LLS_CORE_AWS_ACCESS_KEY_ID = os .getenv ("AWS_ACCESS_KEY_ID" , "" )
67- LLS_CORE_AWS_SECRET_ACCESS_KEY = os .getenv ("AWS_SECRET_ACCESS_KEY" , "" )
68-
69- LLAMA_STACK_DISTRIBUTION_SECRET_DATA = {
70- "postgres-user" : POSTGRESQL_USER ,
71- "postgres-password" : POSTGRESQL_PASSWORD ,
72- "vllm-api-token" : LLS_CORE_VLLM_API_TOKEN ,
73- "vllm-embedding-api-token" : LLS_CORE_VLLM_EMBEDDING_API_TOKEN ,
74- "aws-access-key-id" : LLS_CORE_AWS_ACCESS_KEY_ID ,
75- "aws-secret-access-key" : LLS_CORE_AWS_SECRET_ACCESS_KEY ,
76- }
77-
78- IBM_EARNINGS_DOC_URL = "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6"
79-
80- UPGRADE_DISTRIBUTION_NAME = "llama-stack-distribution-upgrade"
81-
8254
8355@pytest .fixture (scope = "class" )
8456def distribution_name (pytestconfig : pytest .Config ) -> str :
@@ -803,27 +775,64 @@ def vector_store(
803775 """
804776 Creates a vector store for testing and automatically cleans it up.
805777
806- This fixture creates a vector store, yields it to the test,
807- and ensures it's deleted after the test completes (whether it passes or fails).
778+ You can have example documents ingested into the store automatically by passing a
779+ non-empty ``doc_sources`` list in the indirect parametrization dict (URLs, files, or
780+ directories under the repo root). Omit ``doc_sources`` when the test only needs an
781+ empty store.
782+
783+ Options when parametrizing with ``indirect=True``:
784+
785+ * ``vector_io_provider`` (optional): backend id for the store; defaults to ``"milvus"``.
786+ * ``doc_sources`` (optional): non-empty list of document sources to upload after creation.
787+ Omitted, empty, or absent means no uploads. Each entry may be:
788+
789+ * A remote URL (``http://`` or ``https://``)
790+ * A repo-relative or absolute file path
791+ * A directory path (all files in the directory are uploaded)
792+
793+ Example:
794+
795+ @pytest.mark.parametrize(
796+ "vector_store",
797+ [
798+ pytest.param(
799+ {
800+ "vector_io_provider": "milvus",
801+ "doc_sources": [
802+ "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6",
803+ "tests/llama_stack/dataset/corpus/finance",
804+ "tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf",
805+ ],
806+ },
807+ id="doc_sources:url+folder+file",
808+ ),
809+ ],
810+ indirect=True,
811+ )
812+
813+ Post-upgrade runs reuse the existing store; uploads run only in the create path when
814+ ``doc_sources`` is non-empty (documents from the pre-upgrade run are reused otherwise).
808815
809816 Args:
810- llama_stack_client : The configured LlamaStackClient
817+ unprivileged_llama_stack_client : The configured LlamaStackClient
811818 llama_stack_models: Model information including embedding model details
819+ request: Pytest fixture request carrying optional param dict
820+ pytestconfig: Pytest config (post-upgrade reuses store, no create/upload path)
821+ teardown_resources: Whether to delete the store after the class
812822
813823 Yields:
814824 Vector store object that can be used in tests
815825 """
816826
817- params = getattr (request , "param" , {"vector_io_provider" : "milvus" })
818- vector_io_provider = str (params .get ("vector_io_provider" ))
827+ params_raw = getattr (request , "param" , None )
828+ params : dict [str , Any ] = dict (params_raw ) if isinstance (params_raw , dict ) else {"vector_io_provider" : "milvus" }
829+ vector_io_provider = str (params .get ("vector_io_provider" ) or "milvus" )
830+ doc_sources = params .get ("doc_sources" )
819831
820832 if pytestconfig .option .post_upgrade :
833+ stores = unprivileged_llama_stack_client .vector_stores .list ().data
821834 vector_store = next (
822- (
823- vs
824- for vs in unprivileged_llama_stack_client .vector_stores .list ().data
825- if getattr (vs , "name" , "" ) == "test_vector_store"
826- ),
835+ (vs for vs in stores if getattr (vs , "name" , "" ) == "test_vector_store" ),
827836 None ,
828837 )
829838 if not vector_store :
@@ -840,6 +849,47 @@ def vector_store(
840849 )
841850 LOGGER .info (f"vector_store successfully created (provider_id={ vector_io_provider } , id={ vector_store .id } )" )
842851
852+ if doc_sources :
853+ if not isinstance (doc_sources , list ):
854+ raise TypeError (f"doc_sources must be a list[str], got { type (doc_sources ).__name__ } " )
855+ LOGGER .info (
856+ "Uploading doc_sources to vector_store (provider_id=%s, id=%s): %s" ,
857+ vector_io_provider ,
858+ vector_store .id ,
859+ doc_sources ,
860+ )
861+ for source in doc_sources :
862+ if source .startswith (("http://" , "https://" )):
863+ vector_store_create_file_from_url (
864+ url = source ,
865+ llama_stack_client = unprivileged_llama_stack_client ,
866+ vector_store = vector_store ,
867+ )
868+ else :
869+ source_path = Path (source ) # noqa: FCN001
870+ if not source_path .is_absolute ():
871+ source_path = Path (request .config .rootdir ) / source_path # noqa: FCN001
872+
873+ if source_path .is_dir ():
874+ files = sorted (source_path .iterdir ())
875+ if not files :
876+ raise FileNotFoundError (f"No files found in directory: { source_path } " )
877+ for file_path in files :
878+ if file_path .is_file ():
879+ vector_store_create_file_from_path (
880+ file_path = file_path ,
881+ llama_stack_client = unprivileged_llama_stack_client ,
882+ vector_store = vector_store ,
883+ )
884+ elif source_path .is_file ():
885+ vector_store_create_file_from_path (
886+ file_path = source_path ,
887+ llama_stack_client = unprivileged_llama_stack_client ,
888+ vector_store = vector_store ,
889+ )
890+ else :
891+ raise FileNotFoundError (f"Document source not found: { source_path } " )
892+
843893 yield vector_store
844894
845895 if teardown_resources :
@@ -850,36 +900,6 @@ def vector_store(
850900 LOGGER .warning (f"Failed to delete vector store { vector_store .id } : { e } " )
851901
852902
853- @pytest .fixture (scope = "class" )
854- def vector_store_with_example_docs (
855- unprivileged_llama_stack_client : LlamaStackClient , vector_store : VectorStore , pytestconfig : pytest .Config
856- ) -> Generator [VectorStore ]:
857- """
858- Creates a vector store with the IBM fourth-quarter 2025 earnings report uploaded.
859-
860- This fixture depends on the vector_store fixture and uploads the IBM earnings
861- document to the vector store for testing vector, keyword, and hybrid search.
862- The file is automatically cleaned up after the test completes.
863-
864- Args:
865- unprivileged_llama_stack_client: The configured LlamaStackClient
866- vector_store: The vector store fixture to upload files to
867-
868- Yields:
869- Vector store object with uploaded IBM earnings report document
870- """
871- if pytestconfig .option .post_upgrade :
872- LOGGER .info ("Post-upgrade run: reusing vector store docs without uploading new files" )
873- else :
874- vector_store_create_file_from_url (
875- url = IBM_EARNINGS_DOC_URL ,
876- llama_stack_client = unprivileged_llama_stack_client ,
877- vector_store = vector_store ,
878- )
879-
880- yield vector_store
881-
882-
883903@pytest .fixture (scope = "class" )
884904def unprivileged_postgres_service (
885905 pytestconfig : pytest .Config ,
0 commit comments