Skip to content
Open
4 changes: 0 additions & 4 deletions tests/fixtures/vector_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,6 @@ def _factory(provider_name: str) -> list[dict[str, Any]]:
env_vars.append({"name": "MILVUS_CONSISTENCY_LEVEL", "value": "Bounded"})
elif provider_name == "faiss":
env_vars.append({"name": "ENABLE_FAISS", "value": "faiss"})
env_vars.append({
"name": "FAISS_KVSTORE_DB_PATH",
"value": "/opt/app-root/src/.llama/distributions/rh/sqlite_vec.db",
})
elif provider_name == "pgvector":
request.getfixturevalue(argname="pgvector_service")
env_vars.append({"name": "ENABLE_PGVECTOR", "value": "true"})
Expand Down
71 changes: 63 additions & 8 deletions tests/llama_stack/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from collections.abc import Callable, Generator
from typing import Any

Expand All @@ -18,8 +17,11 @@
from ocp_resources.service import Service
from semver import Version

import utilities
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Checking whether the utilities package exposes the infra submodule:"
fd -a '__init__.py' utilities | while read -r file; do
  echo "== $file =="
  rg -nC2 '\binfra\b|__getattr__|__all__' "$file" || true
done

Repository: opendatahub-io/opendatahub-tests

Length of output: 367


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Contents of utilities/__init__.py ==="
cat -n utilities/__init__.py

echo ""
echo "=== Check for utilities/infra module/package ==="
fd -a 'infra' utilities

echo ""
echo "=== Actual usage in conftest.py around line 20 and 76-79 ==="
sed -n '15,85p' tests/llama_stack/conftest.py | cat -n

Repository: opendatahub-io/opendatahub-tests

Length of output: 3119


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Full import section of conftest.py ==="
head -n 50 tests/llama_stack/conftest.py | cat -n

echo ""
echo "=== Search for 'import utilities.infra' or 'from utilities import infra' ==="
rg 'import utilities\.infra|from utilities import infra' tests/llama_stack/conftest.py

echo ""
echo "=== Check if is_disconnected_cluster fixture is actually used ==="
rg 'is_disconnected_cluster' tests/llama_stack/conftest.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 2309


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Search for uses of is_disconnected_cluster fixture ==="
rg 'is_disconnected_cluster' tests/llama_stack/ -l

echo ""
echo "=== Count usages ==="
rg 'is_disconnected_cluster' tests/llama_stack/ | wc -l

Repository: opendatahub-io/opendatahub-tests

Length of output: 188


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Search for test functions using is_disconnected_cluster as parameter ==="
rg 'def test.*is_disconnected_cluster' tests/llama_stack/

echo ""
echo "=== Show all lines with is_disconnected_cluster in conftest.py ==="
rg -n 'is_disconnected_cluster' tests/llama_stack/conftest.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 152


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== All lines with is_disconnected_cluster in conftest.py with context ==="
rg -n 'is_disconnected_cluster' tests/llama_stack/conftest.py -B2 -A2

Repository: opendatahub-io/opendatahub-tests

Length of output: 1824


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Check if utilities/infra.py has is_disconnected_cluster function ==="
rg -n 'def is_disconnected_cluster' utilities/infra.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 212


Fix broken utilities.infra import causing AttributeError at fixture evaluation.

Line 79 accesses utilities.infra.is_disconnected_cluster(...) but import utilities (line 20) does not import the infra submodule—utilities/__init__.py is empty. This causes AttributeError: module 'utilities' has no attribute 'infra' when the fixture is evaluated by llama_stack_server_config (line 89).

Required fix
-import utilities
+from utilities.infra import is_disconnected_cluster as detect_disconnected_cluster
@@
 `@pytest.fixture`(scope="class")
 def is_disconnected_cluster(admin_client: DynamicClient) -> bool:
     """Whether the target cluster is disconnected (air-gapped)."""
-    return utilities.infra.is_disconnected_cluster(client=admin_client)
+    return detect_disconnected_cluster(client=admin_client)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/llama_stack/conftest.py` at line 20, The test module imports the
package but not its infra submodule, causing AttributeError when the
llama_stack_server_config fixture calls utilities.infra.is_disconnected_cluster;
explicitly import the submodule instead of the bare package (e.g., replace the
top-level "import utilities" with an explicit "from utilities import infra" or
"import utilities.infra as infra") and update the fixture references
(llama_stack_server_config and any direct calls to utilities.infra) to use the
imported infra symbol so utilities.infra is available at evaluation time.

from tests.llama_stack.constants import (
HTTPS_PROXY,
LLAMA_STACK_DISTRIBUTION_SECRET_DATA,
LLS_CLIENT_VERIFY_SSL,
LLS_CORE_EMBEDDING_MODEL,
LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID,
LLS_CORE_INFERENCE_MODEL,
Expand Down Expand Up @@ -71,13 +73,20 @@ def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[
yield dsc


@pytest.fixture(scope="class")
def is_disconnected_cluster(admin_client: DynamicClient) -> bool:
"""Whether the target cluster is disconnected (air-gapped)."""
return utilities.infra.is_disconnected_cluster(client=admin_client)


@pytest.fixture(scope="class")
def llama_stack_server_config(
request: FixtureRequest,
pytestconfig: pytest.Config,
distribution_name: str,
vector_io_provider_deployment_config_factory: Callable[[str], list[dict[str, str]]],
files_provider_config_factory: Callable[[str], list[dict[str, str]]],
is_disconnected_cluster: bool,
) -> dict[str, Any]:
"""
Generate server configuration for LlamaStack distribution deployment and deploy vector I/O provider resources.
Expand All @@ -94,6 +103,7 @@ def llama_stack_server_config(
and return their configuration environment variables
files_provider_config_factory: Factory function to configure files storage providers
and return their configuration environment variables
is_disconnected_cluster: Whether the target cluster is disconnected (air-gapped)

Returns:
Dict containing server configuration with the following structure:
Expand Down Expand Up @@ -141,7 +151,10 @@ def test_with_remote_milvus(llama_stack_server_config):
"""

env_vars = []
tls_config: dict[str, Any] | None = None
params = getattr(request, "param", {})
cpu_requests = "2"
cpu_limits = "4"

# INFERENCE_MODEL
if params.get("inference_model"):
Expand Down Expand Up @@ -191,8 +204,21 @@ def test_with_remote_milvus(llama_stack_server_config):
env_vars.append({"name": "VLLM_EMBEDDING_MAX_TOKENS", "value": LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS})
env_vars.append({"name": "VLLM_EMBEDDING_TLS_VERIFY", "value": LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY})
elif embedding_provider == "sentence-transformers":
# Increase CPU limits to prevent timeouts when inserting files into vector stores
cpu_requests = "4"
cpu_limits = "8"

# Enable sentence-transformers embedding model
env_vars.append({"name": "ENABLE_SENTENCE_TRANSFORMERS", "value": "true"})
env_vars.append({"name": "EMBEDDING_PROVIDER", "value": "sentence-transformers"})

if is_disconnected_cluster:
# Workaround to fix sentence-transformer embeddings on disconnected (RHAIENG-1624)
env_vars.append({"name": "SENTENCE_TRANSFORMERS_HOME", "value": "/opt/app-root/src/.cache/huggingface/hub"})
env_vars.append({"name": "HF_HUB_OFFLINE", "value": "1"})
env_vars.append({"name": "TRANSFORMERS_OFFLINE", "value": "1"})
env_vars.append({"name": "HF_DATASETS_OFFLINE", "value": "1"})

else:
raise ValueError(f"Unsupported embeddings provider: {embedding_provider}")

Expand Down Expand Up @@ -229,11 +255,35 @@ def test_with_remote_milvus(llama_stack_server_config):
env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider)
env_vars.extend(env_vars_vector_io)

if is_disconnected_cluster and HTTPS_PROXY:
LOGGER.info(f"Setting proxy and tlsconfig configuration (https_proxy:{HTTPS_PROXY})")
env_vars.append({"name": "HTTPS_PROXY", "value": HTTPS_PROXY})
Comment on lines +258 to +260
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

rg -nC2 '"HTTPS_PROXY"|"NO_PROXY"|"https_proxy"|"no_proxy"|LLS_CORE_VLLM_URL|LLS_CORE_VLLM_EMBEDDING_URL' tests/llama_stack

Repository: opendatahub-io/opendatahub-tests

Length of output: 3832


🏁 Script executed:

#!/bin/bash
# Get the full context around lines 258-260 in conftest.py
head -n 280 tests/llama_stack/conftest.py | tail -n 40

Repository: opendatahub-io/opendatahub-tests

Length of output: 2080


🏁 Script executed:

#!/bin/bash
# Search for NO_PROXY/no_proxy in conftest.py
rg -n 'NO_PROXY|no_proxy' tests/llama_stack/conftest.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 58


🏁 Script executed:

#!/bin/bash
# Check what HTTPS_PROXY constant value is
rg -n 'HTTPS_PROXY\s*=' tests/llama_stack/conftest.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 58


🏁 Script executed:

#!/bin/bash
# Check imports in conftest.py to find HTTPS_PROXY
head -n 50 tests/llama_stack/conftest.py | grep -n HTTPS_PROXY

Repository: opendatahub-io/opendatahub-tests

Length of output: 95


🏁 Script executed:

#!/bin/bash
# Check all references to LLS_CORE_VLLM_URL in constants.py
grep -n "LLS_CORE_VLLM_URL" tests/llama_stack/constants.py

Repository: opendatahub-io/opendatahub-tests

Length of output: 133


🏁 Script executed:

#!/bin/bash
# Get the full constants.py to understand defaults
cat tests/llama_stack/constants.py | head -n 65

Repository: opendatahub-io/opendatahub-tests

Length of output: 2579


Add NO_PROXY exemption when setting HTTPS_PROXY for disconnected clusters.

When HTTPS_PROXY is set, all outbound HTTPS requests use the proxy. LLS_CORE_VLLM_EMBEDDING_URL defaults to https://nomic-embed-text-v1-5.example.com:443/v1, and LLS_CORE_VLLM_URL may also be HTTPS. Both will be routed through the proxy without a NO_PROXY exemption, causing connectivity failures in disconnected clusters where the proxy cannot reach these internal endpoints. Set NO_PROXY to "localhost,127.0.0.1" or the appropriate exempt hosts alongside HTTPS_PROXY.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/llama_stack/conftest.py` around lines 258 - 260, When
is_disconnected_cluster is true and HTTPS_PROXY is added to env_vars, also
append a NO_PROXY entry to exempt local/internal hosts so embedded/internal
HTTPS endpoints (LLS_CORE_VLLM_EMBEDDING_URL, LLS_CORE_VLLM_URL) are bypassed by
the proxy; update the block that logs and appends HTTPS_PROXY (referencing
LOGGER, HTTPS_PROXY, and env_vars) to also append {"name": "NO_PROXY", "value":
"localhost,127.0.0.1"} (or configurable exempt hosts) and include this in the
log message.


# The operator sets SSL_CERT_FILE automatically when tlsConfig.caBundle is
# configured, but the `requests` library (used by tiktoken to download
# tokenizer data) ignores SSL_CERT_FILE and only checks REQUESTS_CA_BUNDLE.
# Without this, tiktoken fails with SSL CERTIFICATE_VERIFY_FAILED when the
# proxy uses a self-signed certificate (e.g. in disconnected clusters).
env_vars.append({
"name": "REQUESTS_CA_BUNDLE",
"value": "/etc/ssl/certs/ca-bundle/ca-bundle.crt",
})

tls_config = {
"caBundle": {
"configMapName": "odh-trusted-ca-bundle",
"configMapKeys": [
"ca-bundle.crt", # CNO-injected cluster CAs
"odh-ca-bundle.crt", # User-specified custom CAs
],
},
}

server_config: dict[str, Any] = {
"containerSpec": {
"resources": {
"requests": {"cpu": "1", "memory": "3Gi"},
"limits": {"cpu": "3", "memory": "6Gi"},
"requests": {"cpu": cpu_requests, "memory": "3Gi"},
"limits": {"cpu": cpu_limits, "memory": "6Gi"},
},
"env": env_vars,
"name": "llama-stack",
Expand All @@ -242,9 +292,15 @@ def test_with_remote_milvus(llama_stack_server_config):
"distribution": {"name": "rh-dev"},
}

if tls_config:
server_config["tlsConfig"] = tls_config

if params.get("llama_stack_storage_size"):
storage_size = params.get("llama_stack_storage_size")
server_config["storage"] = {"size": storage_size}
if is_disconnected_cluster:
LOGGER.warning("Skipping storage_size configuration on disconnected clusters due to known bug RHAIENG-1819")
else:
storage_size = params.get("llama_stack_storage_size")
server_config["storage"] = {"size": storage_size}

return server_config

Expand Down Expand Up @@ -593,14 +649,13 @@ def llama_stack_test_route(
def _create_llama_stack_client(
route: Route,
) -> Generator[LlamaStackClient, Any, Any]:
# LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
verifySSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
http_client = httpx.Client(verify=verifySSL, timeout=240)
http_client = httpx.Client(verify=LLS_CLIENT_VERIFY_SSL, timeout=300)
try:
client = LlamaStackClient(
base_url=f"https://{route.host}",
max_retries=3,
http_client=http_client,
timeout=300,
)
wait_for_llama_stack_client_ready(client=client)
existing_file_ids = {f.id for f in client.files.list().data}
Expand Down
4 changes: 4 additions & 0 deletions tests/llama_stack/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class ModelInfo(NamedTuple):
embedding_dimension: int # API returns integer (e.g., 768)


HTTPS_PROXY: str = os.getenv("SQUID_HTTPS_PROXY", "")

# LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
LLS_CLIENT_VERIFY_SSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true"
LLS_CORE_POD_FILTER: str = "app=llama-stack"
LLS_OPENSHIFT_MINIMAL_VERSION: VersionInfo = semver.VersionInfo.parse("4.17.0")

Expand Down
7 changes: 5 additions & 2 deletions tests/llama_stack/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def vector_store_create_and_poll(
TimeoutError: If wait_timeout is reached while status is still in_progress.
"""
vs_file = llama_stack_client.vector_stores.files.create(
vector_store_id=vector_store_id, file_id=file_id, attributes=attributes
vector_store_id=vector_store_id,
file_id=file_id,
timeout=240, # Increased timeout for slow processing (e.g., sentence-transformers)
attributes=dict(attributes) if attributes else attributes,
)
Comment on lines 105 to 110
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate the file
find . -name "utils.py" -path "*/llama_stack/*" | head -20

Repository: opendatahub-io/opendatahub-tests

Length of output: 138


🏁 Script executed:

# Read the relevant section of the file
cat -n tests/llama_stack/utils.py | sed -n '85,130p'

Repository: opendatahub-io/opendatahub-tests

Length of output: 2329


🏁 Script executed:

rg -n "vector_store_create_and_poll" tests/

Repository: opendatahub-io/opendatahub-tests

Length of output: 214


🏁 Script executed:

# Also check the function signature to see default wait_timeout
cat -n tests/llama_stack/utils.py | sed -n '75,98p'

Repository: opendatahub-io/opendatahub-tests

Length of output: 1187


🏁 Script executed:

cat -n tests/llama_stack/utils.py | sed -n '290,310p'

Repository: opendatahub-io/opendatahub-tests

Length of output: 1300


🏁 Script executed:

# Also check if there are any other uses or integration points
rg -B 5 -A 5 "vector_store_create_and_poll\(" tests/llama_stack/utils.py | head -40

Repository: opendatahub-io/opendatahub-tests

Length of output: 1049


🏁 Script executed:

# Check if there are other callers or integration tests that rely on the wait_timeout contract
rg -B 3 -A 3 "wait_timeout" tests/llama_stack/utils.py | head -50

Repository: opendatahub-io/opendatahub-tests

Length of output: 1799


🏁 Script executed:

# Verify the actual execution path and timing behavior
cat -n tests/llama_stack/utils.py | sed -n '112,122p'

Repository: opendatahub-io/opendatahub-tests

Length of output: 719


Request timeout must respect wait_timeout contract.

The hard-coded timeout=240 in the create() call (line 108) is independent of wait_timeout, causing total execution to potentially exceed the documented budget. The deadline is set after create completes (line 112), allowing the function to run up to 240s + wait_timeout. With default wait_timeout=240, a caller gets up to 480s instead of the promised 240s.

Tie the request timeout to the polling timeout so total execution respects the wait_timeout contract.

Proposed fix
 def vector_store_create_and_poll(
     llama_stack_client: LlamaStackClient,
     vector_store_id: str,
     file_id: str,
     *,
     attributes: dict[str, str | int | float | bool] | None = None,
     poll_interval_sec: float = 5.0,
     wait_timeout: float = 240.0,
+    request_timeout: float | None = None,
 ) -> VectorStoreFile:
+    create_timeout = wait_timeout if request_timeout is None else min(request_timeout, wait_timeout)
     vs_file = llama_stack_client.vector_stores.files.create(
         vector_store_id=vector_store_id,
         file_id=file_id,
-        timeout=240,  # Increased timeout for slow processing (e.g., sentence-transformers)
+        timeout=create_timeout,
         attributes=dict(attributes) if attributes else attributes,
     )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
vs_file = llama_stack_client.vector_stores.files.create(
vector_store_id=vector_store_id, file_id=file_id, attributes=attributes
vector_store_id=vector_store_id,
file_id=file_id,
timeout=240, # Increased timeout for slow processing (e.g., sentence-transformers)
attributes=dict(attributes) if attributes else attributes,
)
def vector_store_create_and_poll(
llama_stack_client: LlamaStackClient,
vector_store_id: str,
file_id: str,
*,
attributes: dict[str, str | int | float | bool] | None = None,
poll_interval_sec: float = 5.0,
wait_timeout: float = 240.0,
request_timeout: float | None = None,
) -> VectorStoreFile:
create_timeout = wait_timeout if request_timeout is None else min(request_timeout, wait_timeout)
vs_file = llama_stack_client.vector_stores.files.create(
vector_store_id=vector_store_id,
file_id=file_id,
timeout=create_timeout,
attributes=dict(attributes) if attributes else attributes,
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/llama_stack/utils.py` around lines 105 - 110, Replace the hard-coded
timeout=240 passed to llama_stack_client.vector_stores.files.create with a value
derived from the function's wait_timeout budget: record a start time
(time.monotonic()), compute request_timeout = max(1, int(wait_timeout -
(time.monotonic() - start))) before calling vector_stores.files.create, and pass
timeout=request_timeout (instead of 240); ensure you handle request_timeout <= 0
by raising or setting a minimal timeout so the subsequent polling/deadline logic
still enforces the overall wait_timeout contract for the operation involving
vector_store_id, file_id and attributes.

terminal_statuses = ("completed", "failed", "cancelled")
deadline = time.monotonic() + wait_timeout
Expand Down Expand Up @@ -155,7 +158,7 @@ def create_llama_stack_distribution(


@retry(
wait_timeout=60,
wait_timeout=240,
sleep=5,
exceptions_dict={ResourceNotFoundError: [], UnexpectedResourceCountError: []},
)
Expand Down