Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions tests/fixtures/vector_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,6 @@ def _factory(provider_name: str) -> list[dict[str, Any]]:
env_vars.append({"name": "MILVUS_CONSISTENCY_LEVEL", "value": "Bounded"})
elif provider_name == "faiss":
env_vars.append({"name": "ENABLE_FAISS", "value": "faiss"})
env_vars.append({
"name": "FAISS_KVSTORE_DB_PATH",
"value": "/opt/app-root/src/.llama/distributions/rh/sqlite_vec.db",
})
elif provider_name == "pgvector":
request.getfixturevalue(argname="pgvector_service")
env_vars.append({"name": "ENABLE_PGVECTOR", "value": "true"})
Expand Down
71 changes: 63 additions & 8 deletions tests/llama_stack/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from collections.abc import Callable, Generator
from typing import Any

Expand All @@ -18,8 +17,11 @@
from ocp_resources.service import Service
from semver import Version

import utilities
from tests.llama_stack.constants import (
HTTPS_PROXY,
LLAMA_STACK_DISTRIBUTION_SECRET_DATA,
LLS_CLIENT_VERIFY_SSL,
LLS_CORE_EMBEDDING_MODEL,
LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID,
LLS_CORE_INFERENCE_MODEL,
Expand Down Expand Up @@ -71,13 +73,20 @@ def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[
yield dsc


@pytest.fixture(scope="class")
def is_disconnected_cluster(admin_client: DynamicClient) -> bool:
"""Whether the target cluster is disconnected (air-gapped)."""
return utilities.infra.is_disconnected_cluster(client=admin_client)


@pytest.fixture(scope="class")
def llama_stack_server_config(
request: FixtureRequest,
pytestconfig: pytest.Config,
distribution_name: str,
vector_io_provider_deployment_config_factory: Callable[[str], list[dict[str, str]]],
files_provider_config_factory: Callable[[str], list[dict[str, str]]],
is_disconnected_cluster: bool,
) -> dict[str, Any]:
"""
Generate server configuration for LlamaStack distribution deployment and deploy vector I/O provider resources.
Expand All @@ -94,6 +103,7 @@ def llama_stack_server_config(
and return their configuration environment variables
files_provider_config_factory: Factory function to configure files storage providers
and return their configuration environment variables
is_disconnected_cluster: Whether the target cluster is disconnected (air-gapped)

Returns:
Dict containing server configuration with the following structure:
Expand Down Expand Up @@ -141,7 +151,10 @@ def test_with_remote_milvus(llama_stack_server_config):
"""

env_vars = []
tls_config: dict[str, Any] | None = None
params = getattr(request, "param", {})
cpu_requests = "2"
cpu_limits = "4"

# INFERENCE_MODEL
if params.get("inference_model"):
Expand Down Expand Up @@ -191,8 +204,21 @@ def test_with_remote_milvus(llama_stack_server_config):
env_vars.append({"name": "VLLM_EMBEDDING_MAX_TOKENS", "value": LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS})
env_vars.append({"name": "VLLM_EMBEDDING_TLS_VERIFY", "value": LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY})
elif embedding_provider == "sentence-transformers":
# Increase CPU limits to prevent timeouts when inserting files into vector stores
cpu_requests = "4"
cpu_limits = "8"

# Enable sentence-transformers embedding model
env_vars.append({"name": "ENABLE_SENTENCE_TRANSFORMERS", "value": "true"})
env_vars.append({"name": "EMBEDDING_PROVIDER", "value": "sentence-transformers"})

if is_disconnected_cluster:
# Workaround to fix sentence-transformer embeddings on disconnected (RHAIENG-1624)
env_vars.append({"name": "SENTENCE_TRANSFORMERS_HOME", "value": "/opt/app-root/src/.cache/huggingface/hub"})
env_vars.append({"name": "HF_HUB_OFFLINE", "value": "1"})
env_vars.append({"name": "TRANSFORMERS_OFFLINE", "value": "1"})
env_vars.append({"name": "HF_DATASETS_OFFLINE", "value": "1"})

else:
raise ValueError(f"Unsupported embeddings provider: {embedding_provider}")

Expand Down Expand Up @@ -229,11 +255,35 @@ def test_with_remote_milvus(llama_stack_server_config):
env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider)
env_vars.extend(env_vars_vector_io)

if is_disconnected_cluster and HTTPS_PROXY:
LOGGER.info(f"Setting proxy and tlsconfig configuration (https_proxy:{HTTPS_PROXY})")
env_vars.append({"name": "HTTPS_PROXY", "value": HTTPS_PROXY})

# The operator sets SSL_CERT_FILE automatically when tlsConfig.caBundle is
# configured, but the `requests` library (used by tiktoken to download
# tokenizer data) ignores SSL_CERT_FILE and only checks REQUESTS_CA_BUNDLE.
# Without this, tiktoken fails with SSL CERTIFICATE_VERIFY_FAILED when the
# proxy uses a self-signed certificate (e.g. in disconnected clusters).
env_vars.append({
"name": "REQUESTS_CA_BUNDLE",
"value": "/etc/ssl/certs/ca-bundle/ca-bundle.crt",
})

tls_config = {
"caBundle": {
"configMapName": "odh-trusted-ca-bundle",
"configMapKeys": [
"ca-bundle.crt", # CNO-injected cluster CAs
"odh-ca-bundle.crt", # User-specified custom CAs
],
},
}

server_config: dict[str, Any] = {
"containerSpec": {
"resources": {
"requests": {"cpu": "1", "memory": "3Gi"},
"limits": {"cpu": "3", "memory": "6Gi"},
"requests": {"cpu": cpu_requests, "memory": "3Gi"},
"limits": {"cpu": cpu_limits, "memory": "6Gi"},
},
"env": env_vars,
"name": "llama-stack",
Expand All @@ -242,9 +292,15 @@ def test_with_remote_milvus(llama_stack_server_config):
"distribution": {"name": "rh-dev"},
}

if tls_config:
server_config["tlsConfig"] = tls_config

if params.get("llama_stack_storage_size"):
storage_size = params.get("llama_stack_storage_size")
server_config["storage"] = {"size": storage_size}
if is_disconnected_cluster:
LOGGER.warning("Skipping storage_size configuration on disconnected clusters due to known bug RHAIENG-1819")
else:
storage_size = params.get("llama_stack_storage_size")
server_config["storage"] = {"size": storage_size}

return server_config

Expand Down Expand Up @@ -593,14 +649,13 @@ def llama_stack_test_route(
def _create_llama_stack_client(
route: Route,
) -> Generator[LlamaStackClient, Any, Any]:
# LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
verifySSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
http_client = httpx.Client(verify=verifySSL, timeout=240)
http_client = httpx.Client(verify=LLS_CLIENT_VERIFY_SSL, timeout=300)
try:
client = LlamaStackClient(
base_url=f"https://{route.host}",
max_retries=3,
http_client=http_client,
timeout=300,
)
wait_for_llama_stack_client_ready(client=client)
existing_file_ids = {f.id for f in client.files.list().data}
Expand Down
4 changes: 4 additions & 0 deletions tests/llama_stack/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class ModelInfo(NamedTuple):
embedding_dimension: int # API returns integer (e.g., 768)


HTTPS_PROXY: str = os.getenv("SQUID_HTTPS_PROXY", "")

# LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
LLS_CLIENT_VERIFY_SSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
LLS_CORE_POD_FILTER: str = "app=llama-stack"
LLS_OPENSHIFT_MINIMAL_VERSION: VersionInfo = semver.VersionInfo.parse("4.17.0")

Expand Down
7 changes: 5 additions & 2 deletions tests/llama_stack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def vector_store_create_and_poll(
TimeoutError: If wait_timeout is reached while status is still in_progress.
"""
vs_file = llama_stack_client.vector_stores.files.create(
vector_store_id=vector_store_id, file_id=file_id, attributes=attributes
vector_store_id=vector_store_id,
file_id=file_id,
timeout=240, # Increased timeout for slow processing (e.g., sentence-transformers)
attributes=dict(attributes) if attributes else attributes,
)
terminal_statuses = ("completed", "failed", "cancelled")
deadline = time.monotonic() + wait_timeout
Expand Down Expand Up @@ -155,7 +158,7 @@ def create_llama_stack_distribution(


@retry(
wait_timeout=60,
wait_timeout=240,
sleep=5,
exceptions_dict={ResourceNotFoundError: [], UnexpectedResourceCountError: []},
)
Expand Down