Skip to content

Commit bee86b6

Browse files
authored
Merge branch 'main' into aysnc_signing
2 parents 91af61f + 67b285b commit bee86b6

File tree

17 files changed

+937
-405
lines changed

17 files changed

+937
-405
lines changed

tests/fixtures/vector_io.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -476,21 +476,6 @@ def get_pgvector_deployment_template() -> dict[str, Any]:
476476
},
477477
{"name": "PGDATA", "value": "/var/lib/postgresql/data/pgdata"},
478478
],
479-
"lifecycle": {
480-
"postStart": {
481-
"exec": {
482-
"command": [
483-
"/bin/sh",
484-
"-c",
485-
(
486-
"sleep 5\n"
487-
f"PGPASSWORD={PGVECTOR_PASSWORD} psql -h localhost -U {PGVECTOR_USER} "
488-
'-d pgvector -c "CREATE EXTENSION IF NOT EXISTS vector;" || true'
489-
),
490-
]
491-
}
492-
}
493-
},
494479
"volumeMounts": [{"name": "pgdata", "mountPath": "/var/lib/postgresql/data"}],
495480
}
496481
],

tests/llama_stack/conftest.py

Lines changed: 100 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@
3434
UPGRADE_DISTRIBUTION_NAME,
3535
ModelInfo,
3636
)
37+
from tests.llama_stack.datasets import Dataset
3738
from tests.llama_stack.utils import (
3839
create_llama_stack_distribution,
40+
vector_store_upload_dataset,
3941
vector_store_upload_doc_sources,
4042
wait_for_llama_stack_client_ready,
4143
wait_for_unique_llama_stack_pod,
@@ -720,6 +722,36 @@ def llama_stack_models(unprivileged_llama_stack_client: LlamaStackClient) -> Mod
720722
return ModelInfo(model_id=model_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension)
721723

722724

725+
@pytest.fixture(scope="class")
726+
def dataset(request: FixtureRequest) -> Dataset:
727+
"""Return the Dataset passed via indirect parametrize.
728+
729+
This exists as a standalone fixture so that test methods can access the
730+
Dataset (e.g. for QA ground-truth queries) without hardcoding it.
731+
732+
Note: we use this fixture instead of a plain pytest parameter to avoid
733+
fixture dependency problems that were causing Llama Stack dependent resources
734+
like databases or secrets not being created at the right time.
735+
736+
Raises:
737+
pytest.UsageError: If the fixture is not indirect-parametrized or the
738+
parameter is not a :class:`~tests.llama_stack.datasets.Dataset` instance.
739+
"""
740+
if not hasattr(request, "param"):
741+
raise pytest.UsageError(
742+
"The `dataset` fixture must be indirect-parametrized with a Dataset instance "
743+
"(e.g. @pytest.mark.parametrize('dataset', [MY_DATASET], indirect=True)). "
744+
"Without indirect parametrization, `request.param` is missing."
745+
)
746+
param = request.param
747+
if not isinstance(param, Dataset):
748+
raise pytest.UsageError(
749+
"The `dataset` fixture must be indirect-parametrized with a "
750+
f"tests.llama_stack.datasets.Dataset instance; got {type(param).__name__!r}."
751+
)
752+
return param
753+
754+
723755
@pytest.fixture(scope="class")
724756
def vector_store(
725757
unprivileged_llama_stack_client: LlamaStackClient,
@@ -729,61 +761,78 @@ def vector_store(
729761
teardown_resources: bool,
730762
) -> Generator[VectorStore]:
731763
"""
732-
Creates a vector store for testing and automatically cleans it up.
733-
734-
You can have example documents ingested into the store automatically by passing a
735-
non-empty ``doc_sources`` list in the indirect parametrization dict (URLs, files, or
736-
directories under the repo root). Omit ``doc_sources`` when the test only needs an
737-
empty store.
738-
739-
Options when parametrizing with ``indirect=True``:
740-
741-
* ``vector_io_provider`` (optional): backend id for the store; defaults to ``"milvus"``.
742-
* ``doc_sources`` (optional): non-empty list of document sources to upload after creation.
743-
Omitted, empty, or absent means no uploads. Each entry may be:
744-
745-
* A remote URL (``http://`` or ``https://``)
746-
* A repo-relative or absolute file path
747-
* A directory path (all files in the directory are uploaded)
748-
749-
Example:
764+
Fixture to provide a vector store instance for tests.
765+
766+
Given: A configured LlamaStackClient, an embedding model, and test parameters specifying
767+
vector store provider and a dataset or document sources.
768+
When: The fixture is invoked by a parameterized test class or function.
769+
Then: It creates (or reuses, in post-upgrade scenarios) a vector store with the specified
770+
vector I/O provider, optionally uploads a dataset or custom document sources, and ensures
771+
proper cleanup after the test if needed.
772+
773+
Parameter Usage:
774+
- vector_io_provider (str): The provider backend to use for the vector store (e.g., 'milvus',
775+
'faiss', 'pgvector', 'qdrant-remote', etc.). Determines how vector data is persisted and queried.
776+
If not specified, defaults to 'milvus'.
777+
- dataset (Dataset): An instance of the Dataset class (see datasets.py) specifying the documents and
778+
ground-truth QA to upload to the vector store. Use this to quickly populate the store with a
779+
standard test corpus. Mutually exclusive with doc_sources.
780+
- doc_sources (list[str]): A list of document sources to upload to the vector store. Each entry may be:
781+
- A file path (repo-relative or absolute) to a single document.
782+
- A directory path, in which case all files within the directory will be uploaded.
783+
- A remote HTTPS URL to a document (e.g., "https://example.com/mydoc.pdf"), which will be downloaded
784+
and ingested.
785+
`doc_sources` is mutually exclusive with `dataset`.
786+
787+
Examples:
788+
# Example 1: Use dataset to populate the vector store
789+
@pytest.mark.parametrize(
790+
"vector_store",
791+
[
792+
pytest.param(
793+
{"vector_io_provider": "milvus", "dataset": IBM_2025_Q4_EARNINGS},
794+
id="milvus-with-IBM-earnings-dataset",
795+
),
796+
],
797+
indirect=True,
798+
)
750799
800+
# Example 2: Upload local documents by file path
751801
@pytest.mark.parametrize(
752802
"vector_store",
753803
[
754804
pytest.param(
755805
{
756-
"vector_io_provider": "milvus",
806+
"vector_io_provider": "faiss",
757807
"doc_sources": [
758-
"https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6",
759-
"tests/llama_stack/dataset/corpus/finance",
760-
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf",
808+
"tests/llama_stack/dataset/corpus/finance/document1.pdf",
809+
"tests/llama_stack/dataset/corpus/finance/document2.pdf",
761810
],
762811
},
763-
id="doc_sources:url+folder+file",
812+
id="faiss-with-explicit-documents",
764813
),
765814
],
766815
indirect=True,
767816
)
768817
769-
Post-upgrade runs reuse the existing store; uploads run only in the create path when
770-
``doc_sources`` is non-empty (documents from the pre-upgrade run are reused otherwise).
771-
772-
Args:
773-
unprivileged_llama_stack_client: The configured LlamaStackClient
774-
llama_stack_models: Model information including embedding model details
775-
request: Pytest fixture request carrying optional param dict
776-
pytestconfig: Pytest config (post-upgrade reuses store, no create/upload path)
777-
teardown_resources: Whether to delete the store after the class
778-
779818
Yields:
780-
Vector store object that can be used in tests
819+
VectorStore: The created or reused vector store ready for ingestion/search tests.
820+
821+
Raises:
822+
ValueError: If the required vector store is missing in a post-upgrade scenario, or if
823+
both ``dataset`` and ``doc_sources`` are set in params (mutually exclusive).
824+
Exception: If vector store creation or file upload fails, attempts cleanup.
781825
"""
782826

783827
params_raw = getattr(request, "param", None)
784828
params: dict[str, Any] = dict(params_raw) if isinstance(params_raw, dict) else {"vector_io_provider": "milvus"}
785829
vector_io_provider = str(params.get("vector_io_provider") or "milvus")
786-
doc_sources = params.get("doc_sources")
830+
dataset: Dataset | None = params.get("dataset")
831+
doc_sources: list[str] | None = params.get("doc_sources")
832+
if dataset is not None and doc_sources is not None:
833+
raise ValueError(
834+
'vector_store fixture params must set at most one of "dataset" or "doc_sources"; both were provided.'
835+
)
787836

788837
if pytestconfig.option.post_upgrade:
789838
stores = unprivileged_llama_stack_client.vector_stores.list().data
@@ -805,27 +854,27 @@ def vector_store(
805854
)
806855
LOGGER.info(f"vector_store successfully created (provider_id={vector_io_provider}, id={vector_store.id})")
807856

808-
if doc_sources:
857+
if dataset or doc_sources:
809858
try:
810-
vector_store_upload_doc_sources(
811-
doc_sources=doc_sources,
812-
llama_stack_client=unprivileged_llama_stack_client,
813-
vector_store=vector_store,
814-
vector_io_provider=vector_io_provider,
815-
)
859+
if dataset:
860+
vector_store_upload_dataset(
861+
dataset=dataset,
862+
llama_stack_client=unprivileged_llama_stack_client,
863+
vector_store=vector_store,
864+
)
865+
elif doc_sources:
866+
vector_store_upload_doc_sources(
867+
doc_sources=doc_sources,
868+
llama_stack_client=unprivileged_llama_stack_client,
869+
vector_store=vector_store,
870+
vector_io_provider=vector_io_provider,
871+
)
816872
except Exception:
817873
try:
818874
unprivileged_llama_stack_client.vector_stores.delete(vector_store_id=vector_store.id)
819-
LOGGER.info(
820-
"Deleted vector store %s after failed doc_sources ingestion",
821-
vector_store.id,
822-
)
875+
LOGGER.info(f"Deleted vector store {vector_store.id} after failed document ingestion")
823876
except Exception as del_exc: # noqa: BLE001
824-
LOGGER.warning(
825-
"Failed to delete vector store %s after ingestion error: %s",
826-
vector_store.id,
827-
del_exc,
828-
)
877+
LOGGER.warning(f"Failed to delete vector store {vector_store.id} after ingestion error: {del_exc}")
829878
raise
830879

831880
yield vector_store

tests/llama_stack/constants.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -69,31 +69,3 @@ class ModelInfo(NamedTuple):
6969
}
7070

7171
UPGRADE_DISTRIBUTION_NAME = "llama-stack-distribution-upgrade"
72-
73-
IBM_2025_Q4_EARNINGS_DOC_ENCRYPTED = "tests/llama_stack/dataset/corpus/pdf-testing/ibm-4q25-press-release-encrypted.pdf"
74-
IBM_2025_Q4_EARNINGS_DOC_UNENCRYPTED = (
75-
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf"
76-
)
77-
IBM_EARNINGS_SEARCH_QUERIES_BY_MODE: dict[str, list[str]] = {
78-
"vector": [
79-
"How did IBM perform financially in the fourth quarter of 2025?",
80-
"What were the main drivers of revenue growth?",
81-
"What is the company outlook for 2026?",
82-
"How did profit margins change year over year?",
83-
"What did leadership say about generative AI and growth?",
84-
],
85-
"keyword": [
86-
"What was free cash flow in the fourth quarter?",
87-
"What was Consulting revenue and segment profit margin?",
88-
"What was Software revenue and constant currency growth?",
89-
"What was diluted earnings per share for continuing operations?",
90-
"What are full-year 2026 expectations for revenue and free cash flow?",
91-
],
92-
"hybrid": [
93-
"What was IBM free cash flow and what does the company expect for 2026?",
94-
"What were segment results for Software and Infrastructure revenue?",
95-
"What was GAAP gross profit margin and pre-tax income?",
96-
"What did James Kavanaugh say about 2025 results and 2026 prospects?",
97-
"What was Consulting revenue and segment profit margin?",
98-
],
99-
}

0 commit comments

Comments
 (0)