Skip to content

Commit 6be3cb8

Browse files
authored
feat(llama_stack): centralize vector/RAG config and shared helpers (#1266)
* feat(llama_stack): centralize vector/RAG config and shared helpers - Fix automation for product bug https://redhat.atlassian.net/browse/RHAIENG-3816 - Move Postgres, vLLM, embedding, and AWS-related defaults into constants (env overrides) - Add IBM 2025 earnings PDFs (encrypted/unencrypted) and finance query sets per search mode - Add vector_store_create_and_poll, file-from-URL/path helpers, and upload assertions in utils - Replace vector_store_with_example_docs with a doc_sources parameter on the vector_store fixture - Refactor conftest and vector store + upgrade RAG tests to use the new constants and helpers Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> Made-with: Cursor Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * fix: enhance doc_sources handling in vector_store fixture - Improved error handling for doc_sources input, ensuring it is a list and paths are validated against the repository root. - Added logging for successful and failed ingestion of document sources. - Streamlined the process for uploading files from URLs and local paths, including directory handling. Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> * fix: delete unused constant Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com> --------- Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
1 parent f381daa commit 6be3cb8

11 files changed

+463
-148
lines changed

tests/llama_stack/conftest.py

Lines changed: 85 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from collections.abc import Callable, Generator
3+
from pathlib import Path
34
from typing import Any
45

56
import httpx
@@ -19,12 +20,24 @@
1920
from simple_logger.logger import get_logger
2021

2122
from tests.llama_stack.constants import (
23+
LLAMA_STACK_DISTRIBUTION_SECRET_DATA,
24+
LLS_CORE_EMBEDDING_MODEL,
25+
LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID,
26+
LLS_CORE_INFERENCE_MODEL,
27+
LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS,
28+
LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY,
29+
LLS_CORE_VLLM_EMBEDDING_URL,
30+
LLS_CORE_VLLM_MAX_TOKENS,
31+
LLS_CORE_VLLM_TLS_VERIFY,
32+
LLS_CORE_VLLM_URL,
2233
LLS_OPENSHIFT_MINIMAL_VERSION,
34+
POSTGRES_IMAGE,
35+
UPGRADE_DISTRIBUTION_NAME,
2336
ModelInfo,
2437
)
2538
from tests.llama_stack.utils import (
2639
create_llama_stack_distribution,
27-
vector_store_create_file_from_url,
40+
vector_store_upload_doc_sources,
2841
wait_for_llama_stack_client_ready,
2942
wait_for_unique_llama_stack_pod,
3043
)
@@ -37,48 +50,6 @@
3750

3851
pytestmark = pytest.mark.skip_on_disconnected
3952

40-
POSTGRES_IMAGE = os.getenv(
41-
"LLS_VECTOR_IO_POSTGRES_IMAGE",
42-
(
43-
"registry.redhat.io/rhel9/postgresql-15@sha256:"
44-
"90ec347a35ab8a5d530c8d09f5347b13cc71df04f3b994bfa8b1a409b1171d59" # postgres 15 # pragma: allowlist secret
45-
),
46-
)
47-
48-
POSTGRESQL_USER = os.getenv("LLS_VECTOR_IO_POSTGRESQL_USER", "ps_user")
49-
POSTGRESQL_PASSWORD = os.getenv("LLS_VECTOR_IO_POSTGRESQL_PASSWORD", "ps_password")
50-
51-
LLS_CORE_INFERENCE_MODEL = os.getenv("LLS_CORE_INFERENCE_MODEL", "")
52-
LLS_CORE_VLLM_URL = os.getenv("LLS_CORE_VLLM_URL", "")
53-
LLS_CORE_VLLM_API_TOKEN = os.getenv("LLS_CORE_VLLM_API_TOKEN", "")
54-
LLS_CORE_VLLM_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_MAX_TOKENS", "16384")
55-
LLS_CORE_VLLM_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_TLS_VERIFY", "true")
56-
57-
LLS_CORE_EMBEDDING_MODEL = os.getenv("LLS_CORE_EMBEDDING_MODEL", "nomic-embed-text-v1-5")
58-
LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID = os.getenv("LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID", "nomic-embed-text-v1-5")
59-
LLS_CORE_VLLM_EMBEDDING_URL = os.getenv(
60-
"LLS_CORE_VLLM_EMBEDDING_URL", "https://nomic-embed-text-v1-5.example.com:443/v1"
61-
)
62-
LLS_CORE_VLLM_EMBEDDING_API_TOKEN = os.getenv("LLS_CORE_VLLM_EMBEDDING_API_TOKEN", "fake")
63-
LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS", "8192")
64-
LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY", "true")
65-
66-
LLS_CORE_AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID", "")
67-
LLS_CORE_AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "")
68-
69-
LLAMA_STACK_DISTRIBUTION_SECRET_DATA = {
70-
"postgres-user": POSTGRESQL_USER,
71-
"postgres-password": POSTGRESQL_PASSWORD,
72-
"vllm-api-token": LLS_CORE_VLLM_API_TOKEN,
73-
"vllm-embedding-api-token": LLS_CORE_VLLM_EMBEDDING_API_TOKEN,
74-
"aws-access-key-id": LLS_CORE_AWS_ACCESS_KEY_ID,
75-
"aws-secret-access-key": LLS_CORE_AWS_SECRET_ACCESS_KEY,
76-
}
77-
78-
IBM_EARNINGS_DOC_URL = "https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6"
79-
80-
UPGRADE_DISTRIBUTION_NAME = "llama-stack-distribution-upgrade"
81-
8253

8354
@pytest.fixture(scope="class")
8455
def distribution_name(pytestconfig: pytest.Config) -> str:
@@ -761,27 +732,64 @@ def vector_store(
761732
"""
762733
Creates a vector store for testing and automatically cleans it up.
763734
764-
This fixture creates a vector store, yields it to the test,
765-
and ensures it's deleted after the test completes (whether it passes or fails).
735+
You can have example documents ingested into the store automatically by passing a
736+
non-empty ``doc_sources`` list in the indirect parametrization dict (URLs, files, or
737+
directories under the repo root). Omit ``doc_sources`` when the test only needs an
738+
empty store.
739+
740+
Options when parametrizing with ``indirect=True``:
741+
742+
* ``vector_io_provider`` (optional): backend id for the store; defaults to ``"milvus"``.
743+
* ``doc_sources`` (optional): non-empty list of document sources to upload after creation.
744+
Omitted, empty, or absent means no uploads. Each entry may be:
745+
746+
* A remote URL (``http://`` or ``https://``)
747+
* A repo-relative or absolute file path
748+
* A directory path (all files in the directory are uploaded)
749+
750+
Example:
751+
752+
@pytest.mark.parametrize(
753+
"vector_store",
754+
[
755+
pytest.param(
756+
{
757+
"vector_io_provider": "milvus",
758+
"doc_sources": [
759+
"https://www.ibm.com/downloads/documents/us-en/1550f7eea8c0ded6",
760+
"tests/llama_stack/dataset/corpus/finance",
761+
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf",
762+
],
763+
},
764+
id="doc_sources:url+folder+file",
765+
),
766+
],
767+
indirect=True,
768+
)
769+
770+
Post-upgrade runs reuse the existing store; uploads run only in the create path when
771+
``doc_sources`` is non-empty (documents from the pre-upgrade run are reused otherwise).
766772
767773
Args:
768-
llama_stack_client: The configured LlamaStackClient
774+
unprivileged_llama_stack_client: The configured LlamaStackClient
769775
llama_stack_models: Model information including embedding model details
776+
request: Pytest fixture request carrying optional param dict
777+
pytestconfig: Pytest config (post-upgrade reuses store, no create/upload path)
778+
teardown_resources: Whether to delete the store after the class
770779
771780
Yields:
772781
Vector store object that can be used in tests
773782
"""
774783

775-
params = getattr(request, "param", {"vector_io_provider": "milvus"})
776-
vector_io_provider = str(params.get("vector_io_provider"))
784+
params_raw = getattr(request, "param", None)
785+
params: dict[str, Any] = dict(params_raw) if isinstance(params_raw, dict) else {"vector_io_provider": "milvus"}
786+
vector_io_provider = str(params.get("vector_io_provider") or "milvus")
787+
doc_sources = params.get("doc_sources")
777788

778789
if pytestconfig.option.post_upgrade:
790+
stores = unprivileged_llama_stack_client.vector_stores.list().data
779791
vector_store = next(
780-
(
781-
vs
782-
for vs in unprivileged_llama_stack_client.vector_stores.list().data
783-
if getattr(vs, "name", "") == "test_vector_store"
784-
),
792+
(vs for vs in stores if getattr(vs, "name", "") == "test_vector_store"),
785793
None,
786794
)
787795
if not vector_store:
@@ -798,6 +806,30 @@ def vector_store(
798806
)
799807
LOGGER.info(f"vector_store successfully created (provider_id={vector_io_provider}, id={vector_store.id})")
800808

809+
if doc_sources:
810+
try:
811+
vector_store_upload_doc_sources(
812+
doc_sources=doc_sources,
813+
repo_root=Path(request.config.rootdir).resolve(),
814+
llama_stack_client=unprivileged_llama_stack_client,
815+
vector_store=vector_store,
816+
vector_io_provider=vector_io_provider,
817+
)
818+
except Exception:
819+
try:
820+
unprivileged_llama_stack_client.vector_stores.delete(vector_store_id=vector_store.id)
821+
LOGGER.info(
822+
"Deleted vector store %s after failed doc_sources ingestion",
823+
vector_store.id,
824+
)
825+
except Exception as del_exc: # noqa: BLE001
826+
LOGGER.warning(
827+
"Failed to delete vector store %s after ingestion error: %s",
828+
vector_store.id,
829+
del_exc,
830+
)
831+
raise
832+
801833
yield vector_store
802834

803835
if teardown_resources:
@@ -808,36 +840,6 @@ def vector_store(
808840
LOGGER.warning(f"Failed to delete vector store {vector_store.id}: {e}")
809841

810842

811-
@pytest.fixture(scope="class")
812-
def vector_store_with_example_docs(
813-
unprivileged_llama_stack_client: LlamaStackClient, vector_store: VectorStore, pytestconfig: pytest.Config
814-
) -> Generator[VectorStore]:
815-
"""
816-
Creates a vector store with the IBM fourth-quarter 2025 earnings report uploaded.
817-
818-
This fixture depends on the vector_store fixture and uploads the IBM earnings
819-
document to the vector store for testing vector, keyword, and hybrid search.
820-
The file is automatically cleaned up after the test completes.
821-
822-
Args:
823-
unprivileged_llama_stack_client: The configured LlamaStackClient
824-
vector_store: The vector store fixture to upload files to
825-
826-
Yields:
827-
Vector store object with uploaded IBM earnings report document
828-
"""
829-
if pytestconfig.option.post_upgrade:
830-
LOGGER.info("Post-upgrade run: reusing vector store docs without uploading new files")
831-
else:
832-
vector_store_create_file_from_url(
833-
url=IBM_EARNINGS_DOC_URL,
834-
llama_stack_client=unprivileged_llama_stack_client,
835-
vector_store=vector_store,
836-
)
837-
838-
yield vector_store
839-
840-
841843
@pytest.fixture(scope="class")
842844
def unprivileged_postgres_service(
843845
pytestconfig: pytest.Config,

tests/llama_stack/constants.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from enum import Enum
23
from typing import NamedTuple
34

@@ -29,3 +30,70 @@ class ModelInfo(NamedTuple):
2930

3031
LLS_CORE_POD_FILTER: str = "app=llama-stack"
3132
LLS_OPENSHIFT_MINIMAL_VERSION: VersionInfo = semver.VersionInfo.parse("4.17.0")
33+
34+
POSTGRES_IMAGE = os.getenv(
35+
"LLS_VECTOR_IO_POSTGRES_IMAGE",
36+
(
37+
"registry.redhat.io/rhel9/postgresql-15@sha256:"
38+
"90ec347a35ab8a5d530c8d09f5347b13cc71df04f3b994bfa8b1a409b1171d59" # postgres 15 # pragma: allowlist secret
39+
),
40+
)
41+
POSTGRESQL_USER = os.getenv("LLS_VECTOR_IO_POSTGRESQL_USER", "ps_user")
42+
POSTGRESQL_PASSWORD = os.getenv("LLS_VECTOR_IO_POSTGRESQL_PASSWORD", "ps_password")
43+
44+
LLS_CORE_INFERENCE_MODEL = os.getenv("LLS_CORE_INFERENCE_MODEL", "")
45+
LLS_CORE_VLLM_URL = os.getenv("LLS_CORE_VLLM_URL", "")
46+
LLS_CORE_VLLM_API_TOKEN = os.getenv("LLS_CORE_VLLM_API_TOKEN", "")
47+
LLS_CORE_VLLM_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_MAX_TOKENS", "16384")
48+
LLS_CORE_VLLM_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_TLS_VERIFY", "true")
49+
50+
LLS_CORE_EMBEDDING_MODEL = os.getenv("LLS_CORE_EMBEDDING_MODEL", "nomic-embed-text-v1-5")
51+
LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID = os.getenv("LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID", "nomic-embed-text-v1-5")
52+
LLS_CORE_VLLM_EMBEDDING_URL = os.getenv(
53+
"LLS_CORE_VLLM_EMBEDDING_URL", "https://nomic-embed-text-v1-5.example.com:443/v1"
54+
)
55+
LLS_CORE_VLLM_EMBEDDING_API_TOKEN = os.getenv("LLS_CORE_VLLM_EMBEDDING_API_TOKEN", "fake")
56+
LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS = os.getenv("LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS", "8192")
57+
LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY = os.getenv("LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY", "true")
58+
59+
LLS_CORE_AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID", "")
60+
LLS_CORE_AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "")
61+
62+
LLAMA_STACK_DISTRIBUTION_SECRET_DATA = {
63+
"postgres-user": POSTGRESQL_USER,
64+
"postgres-password": POSTGRESQL_PASSWORD,
65+
"vllm-api-token": LLS_CORE_VLLM_API_TOKEN,
66+
"vllm-embedding-api-token": LLS_CORE_VLLM_EMBEDDING_API_TOKEN,
67+
"aws-access-key-id": LLS_CORE_AWS_ACCESS_KEY_ID,
68+
"aws-secret-access-key": LLS_CORE_AWS_SECRET_ACCESS_KEY,
69+
}
70+
71+
UPGRADE_DISTRIBUTION_NAME = "llama-stack-distribution-upgrade"
72+
73+
IBM_2025_Q4_EARNINGS_DOC_ENCRYPTED = "tests/llama_stack/dataset/corpus/pdf-testing/ibm-4q25-press-release-encrypted.pdf"
74+
IBM_2025_Q4_EARNINGS_DOC_UNENCRYPTED = (
75+
"tests/llama_stack/dataset/corpus/finance/ibm-4q25-earnings-press-release-unencrypted.pdf"
76+
)
77+
IBM_EARNINGS_SEARCH_QUERIES_BY_MODE: dict[str, list[str]] = {
78+
"vector": [
79+
"How did IBM perform financially in the fourth quarter of 2025?",
80+
"What were the main drivers of revenue growth?",
81+
"What is the company outlook for 2026?",
82+
"How did profit margins change year over year?",
83+
"What did leadership say about generative AI and growth?",
84+
],
85+
"keyword": [
86+
"What was free cash flow in the fourth quarter?",
87+
"What was Consulting revenue and segment profit margin?",
88+
"What was Software revenue and constant currency growth?",
89+
"What was diluted earnings per share for continuing operations?",
90+
"What are full-year 2026 expectations for revenue and free cash flow?",
91+
],
92+
"hybrid": [
93+
"What was IBM free cash flow and what does the company expect for 2026?",
94+
"What were segment results for Software and Infrastructure revenue?",
95+
"What was GAAP gross profit margin and pre-tax income?",
96+
"What did James Kavanaugh say about 2025 results and 2026 prospects?",
97+
"What was Consulting revenue and segment profit margin?",
98+
],
99+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Llama Stack test fixtures (internal)
2+
3+
These files are for **internal Open Data Hub / OpenShift AI integration tests** only. We use them to hit **[Llama Stack](https://github.com/meta-llama/llama-stack) vector store APIs**—think ingest, indexing, search, and the plumbing around that—not as a shipped dataset or for model training.
4+
5+
## IBM finance PDFs (`corpus/finance/`)
6+
7+
The PDFs here are IBM **quarterly earnings press releases** (the same material IBM posts for investors). If you need to replace or refresh them, download the official PDFs from IBM’s site:
8+
9+
[Quarterly earnings announcements](https://www.ibm.com/investor/financial-reporting/quarterly-earnings) (choose year and quarter, then open the press release PDF).
10+
11+
## PDF edge cases (`corpus/pdf-testing/`)
12+
13+
This folder is for **weird PDFs on purpose**: password-protected files, digitally signed ones (e.g. PAdES), and similar cases so we can test how ingestion and parsers behave when the file is not a plain “print to PDF” document.
14+
15+
## Small print
16+
17+
Not for external distribution as a “dataset.” PDFs stay under their publishers’ terms; don’t reuse them outside this test context without checking those terms.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)