-
Notifications
You must be signed in to change notification settings - Fork 61
Support llama stack tests on disconnected using https_proxy #1336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
11f2cf2
52f8ee2
f8c7071
522a7de
a2bbc60
55a5db7
6da6210
617e5e0
83257fd
9bb2a64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,3 @@ | ||
| import os | ||
| from collections.abc import Callable, Generator | ||
| from typing import Any | ||
|
|
||
|
|
@@ -18,8 +17,11 @@ | |
| from ocp_resources.service import Service | ||
| from semver import Version | ||
|
|
||
| import utilities | ||
| from tests.llama_stack.constants import ( | ||
| HTTPS_PROXY, | ||
| LLAMA_STACK_DISTRIBUTION_SECRET_DATA, | ||
| LLS_CLIENT_VERIFY_SSL, | ||
| LLS_CORE_EMBEDDING_MODEL, | ||
| LLS_CORE_EMBEDDING_PROVIDER_MODEL_ID, | ||
| LLS_CORE_INFERENCE_MODEL, | ||
|
|
@@ -71,13 +73,20 @@ def enabled_llama_stack_operator(dsc_resource: DataScienceCluster) -> Generator[ | |
| yield dsc | ||
|
|
||
|
|
||
| @pytest.fixture(scope="class") | ||
| def is_disconnected_cluster(admin_client: DynamicClient) -> bool: | ||
| """Whether the target cluster is disconnected (air-gapped).""" | ||
| return utilities.infra.is_disconnected_cluster(client=admin_client) | ||
|
|
||
|
|
||
| @pytest.fixture(scope="class") | ||
| def llama_stack_server_config( | ||
| request: FixtureRequest, | ||
| pytestconfig: pytest.Config, | ||
| distribution_name: str, | ||
| vector_io_provider_deployment_config_factory: Callable[[str], list[dict[str, str]]], | ||
| files_provider_config_factory: Callable[[str], list[dict[str, str]]], | ||
| is_disconnected_cluster: bool, | ||
| ) -> dict[str, Any]: | ||
| """ | ||
| Generate server configuration for LlamaStack distribution deployment and deploy vector I/O provider resources. | ||
|
|
@@ -94,6 +103,7 @@ def llama_stack_server_config( | |
| and return their configuration environment variables | ||
| files_provider_config_factory: Factory function to configure files storage providers | ||
| and return their configuration environment variables | ||
| is_disconnected_cluster: Whether the target cluster is disconnected (air-gapped) | ||
|
|
||
| Returns: | ||
| Dict containing server configuration with the following structure: | ||
|
|
@@ -141,7 +151,10 @@ def test_with_remote_milvus(llama_stack_server_config): | |
| """ | ||
|
|
||
| env_vars = [] | ||
| tls_config: dict[str, Any] | None = None | ||
| params = getattr(request, "param", {}) | ||
| cpu_requests = "2" | ||
| cpu_limits = "4" | ||
|
|
||
| # INFERENCE_MODEL | ||
| if params.get("inference_model"): | ||
|
|
@@ -191,8 +204,21 @@ def test_with_remote_milvus(llama_stack_server_config): | |
| env_vars.append({"name": "VLLM_EMBEDDING_MAX_TOKENS", "value": LLS_CORE_VLLM_EMBEDDING_MAX_TOKENS}) | ||
| env_vars.append({"name": "VLLM_EMBEDDING_TLS_VERIFY", "value": LLS_CORE_VLLM_EMBEDDING_TLS_VERIFY}) | ||
| elif embedding_provider == "sentence-transformers": | ||
| # Increase CPU limits to prevent timeouts when inserting files into vector stores | ||
| cpu_requests = "4" | ||
| cpu_limits = "8" | ||
|
|
||
| # Enable sentence-transformers embedding model | ||
| env_vars.append({"name": "ENABLE_SENTENCE_TRANSFORMERS", "value": "true"}) | ||
| env_vars.append({"name": "EMBEDDING_PROVIDER", "value": "sentence-transformers"}) | ||
|
|
||
| if is_disconnected_cluster: | ||
| # Workaround to fix sentence-transformer embeddings on disconnected (RHAIENG-1624) | ||
| env_vars.append({"name": "SENTENCE_TRANSFORMERS_HOME", "value": "/opt/app-root/src/.cache/huggingface/hub"}) | ||
| env_vars.append({"name": "HF_HUB_OFFLINE", "value": "1"}) | ||
| env_vars.append({"name": "TRANSFORMERS_OFFLINE", "value": "1"}) | ||
| env_vars.append({"name": "HF_DATASETS_OFFLINE", "value": "1"}) | ||
|
|
||
| else: | ||
| raise ValueError(f"Unsupported embeddings provider: {embedding_provider}") | ||
|
|
||
|
|
@@ -229,11 +255,35 @@ def test_with_remote_milvus(llama_stack_server_config): | |
| env_vars_vector_io = vector_io_provider_deployment_config_factory(provider_name=vector_io_provider) | ||
| env_vars.extend(env_vars_vector_io) | ||
|
|
||
| if is_disconnected_cluster and HTTPS_PROXY: | ||
| LOGGER.info(f"Setting proxy and tlsconfig configuration (https_proxy:{HTTPS_PROXY})") | ||
| env_vars.append({"name": "HTTPS_PROXY", "value": HTTPS_PROXY}) | ||
|
Comment on lines
+258
to
+260
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -nC2 '"HTTPS_PROXY"|"NO_PROXY"|"https_proxy"|"no_proxy"|LLS_CORE_VLLM_URL|LLS_CORE_VLLM_EMBEDDING_URL' tests/llama_stackRepository: opendatahub-io/opendatahub-tests Length of output: 3832 🏁 Script executed: #!/bin/bash
# Get the full context around lines 258-260 in conftest.py
head -n 280 tests/llama_stack/conftest.py | tail -n 40Repository: opendatahub-io/opendatahub-tests Length of output: 2080 🏁 Script executed: #!/bin/bash
# Search for NO_PROXY/no_proxy in conftest.py
rg -n 'NO_PROXY|no_proxy' tests/llama_stack/conftest.pyRepository: opendatahub-io/opendatahub-tests Length of output: 58 🏁 Script executed: #!/bin/bash
# Check what HTTPS_PROXY constant value is
rg -n 'HTTPS_PROXY\s*=' tests/llama_stack/conftest.pyRepository: opendatahub-io/opendatahub-tests Length of output: 58 🏁 Script executed: #!/bin/bash
# Check imports in conftest.py to find HTTPS_PROXY
head -n 50 tests/llama_stack/conftest.py | grep -n HTTPS_PROXYRepository: opendatahub-io/opendatahub-tests Length of output: 95 🏁 Script executed: #!/bin/bash
# Check all references to LLS_CORE_VLLM_URL in constants.py
grep -n "LLS_CORE_VLLM_URL" tests/llama_stack/constants.pyRepository: opendatahub-io/opendatahub-tests Length of output: 133 🏁 Script executed: #!/bin/bash
# Get the full constants.py to understand defaults
cat tests/llama_stack/constants.py | head -n 65Repository: opendatahub-io/opendatahub-tests Length of output: 2579 Add When 🤖 Prompt for AI Agents |
||
|
|
||
| # The operator sets SSL_CERT_FILE automatically when tlsConfig.caBundle is | ||
| # configured, but the `requests` library (used by tiktoken to download | ||
| # tokenizer data) ignores SSL_CERT_FILE and only checks REQUESTS_CA_BUNDLE. | ||
| # Without this, tiktoken fails with SSL CERTIFICATE_VERIFY_FAILED when the | ||
| # proxy uses a self-signed certificate (e.g. in disconnected clusters). | ||
| env_vars.append({ | ||
| "name": "REQUESTS_CA_BUNDLE", | ||
| "value": "/etc/ssl/certs/ca-bundle/ca-bundle.crt", | ||
| }) | ||
|
|
||
| tls_config = { | ||
| "caBundle": { | ||
| "configMapName": "odh-trusted-ca-bundle", | ||
| "configMapKeys": [ | ||
| "ca-bundle.crt", # CNO-injected cluster CAs | ||
| "odh-ca-bundle.crt", # User-specified custom CAs | ||
| ], | ||
| }, | ||
| } | ||
|
|
||
| server_config: dict[str, Any] = { | ||
| "containerSpec": { | ||
| "resources": { | ||
| "requests": {"cpu": "1", "memory": "3Gi"}, | ||
| "limits": {"cpu": "3", "memory": "6Gi"}, | ||
| "requests": {"cpu": cpu_requests, "memory": "3Gi"}, | ||
| "limits": {"cpu": cpu_limits, "memory": "6Gi"}, | ||
| }, | ||
| "env": env_vars, | ||
| "name": "llama-stack", | ||
|
|
@@ -242,9 +292,15 @@ def test_with_remote_milvus(llama_stack_server_config): | |
| "distribution": {"name": "rh-dev"}, | ||
| } | ||
|
|
||
| if tls_config: | ||
| server_config["tlsConfig"] = tls_config | ||
|
|
||
| if params.get("llama_stack_storage_size"): | ||
| storage_size = params.get("llama_stack_storage_size") | ||
| server_config["storage"] = {"size": storage_size} | ||
| if is_disconnected_cluster: | ||
| LOGGER.warning("Skipping storage_size configuration on disconnected clusters due to known bug RHAIENG-1819") | ||
| else: | ||
| storage_size = params.get("llama_stack_storage_size") | ||
| server_config["storage"] = {"size": storage_size} | ||
jgarciao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return server_config | ||
|
|
||
|
|
@@ -593,14 +649,13 @@ def llama_stack_test_route( | |
| def _create_llama_stack_client( | ||
| route: Route, | ||
| ) -> Generator[LlamaStackClient, Any, Any]: | ||
| # LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates | ||
| verifySSL = os.getenv("LLS_CLIENT_VERIFY_SSL", "false").lower() == "true" | ||
| http_client = httpx.Client(verify=verifySSL, timeout=240) | ||
| http_client = httpx.Client(verify=LLS_CLIENT_VERIFY_SSL, timeout=300) | ||
| try: | ||
| client = LlamaStackClient( | ||
| base_url=f"https://{route.host}", | ||
| max_retries=3, | ||
| http_client=http_client, | ||
| timeout=300, | ||
| ) | ||
| wait_for_llama_stack_client_ready(client=client) | ||
| existing_file_ids = {f.id for f in client.files.list().data} | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -103,7 +103,10 @@ def vector_store_create_and_poll( | |||||||||||||||||||||||||||||||||||||||||||||||||
| TimeoutError: If wait_timeout is reached while status is still in_progress. | ||||||||||||||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||||||||||||||
| vs_file = llama_stack_client.vector_stores.files.create( | ||||||||||||||||||||||||||||||||||||||||||||||||||
| vector_store_id=vector_store_id, file_id=file_id, attributes=attributes | ||||||||||||||||||||||||||||||||||||||||||||||||||
| vector_store_id=vector_store_id, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| file_id=file_id, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| timeout=240, # Increased timeout for slow processing (e.g., sentence-transformers) | ||||||||||||||||||||||||||||||||||||||||||||||||||
| attributes=dict(attributes) if attributes else attributes, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
105
to
110
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, locate the file
find . -name "utils.py" -path "*/llama_stack/*" | head -20Repository: opendatahub-io/opendatahub-tests Length of output: 138 🏁 Script executed: # Read the relevant section of the file
cat -n tests/llama_stack/utils.py | sed -n '85,130p'Repository: opendatahub-io/opendatahub-tests Length of output: 2329 🏁 Script executed: rg -n "vector_store_create_and_poll" tests/Repository: opendatahub-io/opendatahub-tests Length of output: 214 🏁 Script executed: # Also check the function signature to see default wait_timeout
cat -n tests/llama_stack/utils.py | sed -n '75,98p'Repository: opendatahub-io/opendatahub-tests Length of output: 1187 🏁 Script executed: cat -n tests/llama_stack/utils.py | sed -n '290,310p'Repository: opendatahub-io/opendatahub-tests Length of output: 1300 🏁 Script executed: # Also check if there are any other uses or integration points
rg -B 5 -A 5 "vector_store_create_and_poll\(" tests/llama_stack/utils.py | head -40Repository: opendatahub-io/opendatahub-tests Length of output: 1049 🏁 Script executed: # Check if there are other callers or integration tests that rely on the wait_timeout contract
rg -B 3 -A 3 "wait_timeout" tests/llama_stack/utils.py | head -50Repository: opendatahub-io/opendatahub-tests Length of output: 1799 🏁 Script executed: # Verify the actual execution path and timing behavior
cat -n tests/llama_stack/utils.py | sed -n '112,122p'Repository: opendatahub-io/opendatahub-tests Length of output: 719 Request timeout must respect The hard-coded Tie the request timeout to the polling timeout so total execution respects the Proposed fix def vector_store_create_and_poll(
llama_stack_client: LlamaStackClient,
vector_store_id: str,
file_id: str,
*,
attributes: dict[str, str | int | float | bool] | None = None,
poll_interval_sec: float = 5.0,
wait_timeout: float = 240.0,
+ request_timeout: float | None = None,
) -> VectorStoreFile:
+ create_timeout = wait_timeout if request_timeout is None else min(request_timeout, wait_timeout)
vs_file = llama_stack_client.vector_stores.files.create(
vector_store_id=vector_store_id,
file_id=file_id,
- timeout=240, # Increased timeout for slow processing (e.g., sentence-transformers)
+ timeout=create_timeout,
attributes=dict(attributes) if attributes else attributes,
)📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||
| terminal_statuses = ("completed", "failed", "cancelled") | ||||||||||||||||||||||||||||||||||||||||||||||||||
| deadline = time.monotonic() + wait_timeout | ||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -155,7 +158,7 @@ def create_llama_stack_distribution( | |||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||
| @retry( | ||||||||||||||||||||||||||||||||||||||||||||||||||
| wait_timeout=60, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| wait_timeout=240, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| sleep=5, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| exceptions_dict={ResourceNotFoundError: [], UnexpectedResourceCountError: []}, | ||||||||||||||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 367
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 3119
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 2309
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 188
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 152
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 1824
🏁 Script executed:
Repository: opendatahub-io/opendatahub-tests
Length of output: 212
Fix broken
utilities.infraimport causing AttributeError at fixture evaluation.Line 79 accesses
utilities.infra.is_disconnected_cluster(...)butimport utilities(line 20) does not import theinfrasubmodule—utilities/__init__.pyis empty. This causesAttributeError: module 'utilities' has no attribute 'infra'when the fixture is evaluated byllama_stack_server_config(line 89).Required fix
🤖 Prompt for AI Agents