Skip to content

Commit fbe1049

Browse files
authored
Merge branch 'main' into gaussian-credit-model-revert
2 parents 8377a01 + c54386e commit fbe1049

File tree

8 files changed

+225
-179
lines changed

8 files changed

+225
-179
lines changed

tests/fixtures/guardrails.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ def guardrails_orchestrator(
5252
metrics_endpoint = request.getfixturevalue(argname="otelcol_metrics_endpoint")
5353
traces_endpoint = request.getfixturevalue(argname="tempo_traces_endpoint")
5454
gorch_kwargs["otel_exporter"] = {
55-
"metricsEndpoint": metrics_endpoint,
56-
"metricsProtocol": "grpc",
57-
"otlpExport": "metrics,traces",
58-
"tracesEndpoint": traces_endpoint,
59-
"tracesProtocol": "grpc",
55+
"otlpProtocol": "grpc",
56+
"otlpMetricsEndpoint": metrics_endpoint,
57+
"otlpTracesEndpoint": traces_endpoint,
58+
"enableMetrics": True,
59+
"enableTracing": True,
6060
}
6161

6262
with GuardrailsOrchestrator(**gorch_kwargs) as gorch:

tests/model_explainability/guardrails/conftest.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,24 @@ def installed_tempo_operator(admin_client: DynamicClient, model_namespace: Names
158158
"""
159159
Installs the Tempo operator and waits for its deployment.
160160
"""
161-
operator_ns = Namespace(name="openshift-operators", ensure_exists=True)
161+
162+
operator_ns_name = "openshift-tempo-operator"
163+
operator_ns = Namespace(name=operator_ns_name)
164+
if not operator_ns.exists:
165+
operator_ns.create()
166+
162167
package_name = "tempo-product"
163168

164169
install_operator(
165170
admin_client=admin_client,
166-
target_namespaces=["openshift-operators"],
171+
target_namespaces=None,
167172
name=package_name,
168173
channel="stable",
169174
source="redhat-operators",
170175
operator_namespace=operator_ns.name,
171176
timeout=Timeout.TIMEOUT_15MIN,
172177
install_plan_approval="Automatic",
173-
starting_csv="tempo-operator.v0.18.0-1",
178+
starting_csv="tempo-operator.v0.18.0-2",
174179
)
175180

176181
deployment = Deployment(
@@ -199,8 +204,7 @@ def tempo_stack(
199204
minio_secret_otel: Secret,
200205
) -> Generator[Any, Any, None]:
201206
"""
202-
Create a TempoStack CR in the test namespace, configured to use the MinIO backend.
203-
Mirrors the structure and management pattern of the mariadb_operator_cr fixture.
207+
Create a TempoStack CR in the test namespace, configured to use MinIO backend.
204208
"""
205209
csv_prefix = "tempo-operator"
206210
tempo_name = "my-tempo-stack"
@@ -209,20 +213,21 @@ def tempo_stack(
209213
tempo_csv: ClusterServiceVersion = get_cluster_service_version(
210214
client=admin_client,
211215
prefix=csv_prefix,
212-
namespace="openshift-operators",
216+
namespace="openshift-tempo-operator",
213217
)
214218

215-
# Retrieve example CRs (ALM examples)
219+
# Retrieve ALM examples and pick TempoStack CR
216220
alm_examples: list[dict[str, Any]] = tempo_csv.get_alm_examples()
217-
218-
# Find the TempoStack kind example
219-
tempo_stack_dict: dict[str, Any] = next(
220-
example
221-
for example in alm_examples
222-
if example["kind"] == "TempoStack" and example["apiVersion"].startswith("tempo.grafana.com/")
221+
tempo_stack_dict = next(
222+
(
223+
example
224+
for example in alm_examples
225+
if example["kind"] == "TempoStack" and example["apiVersion"].startswith("tempo.grafana.com/")
226+
),
227+
None,
223228
)
224229
if not tempo_stack_dict:
225-
raise ResourceNotFoundError(f"No TempoStack dict found in alm_examples for CSV {tempo_csv.name}")
230+
raise ResourceNotFoundError(f"No TempoStack dict found in ALM examples for CSV {tempo_csv.name}")
226231

227232
# Customize metadata
228233
tempo_stack_dict["metadata"]["namespace"] = model_namespace.name
@@ -267,7 +272,10 @@ def installed_opentelemetry_operator(admin_client: DynamicClient) -> Generator[N
267272
"""
268273
Installs the Red Hat OpenTelemetry Operator and waits for its deployment.
269274
"""
270-
operator_ns = Namespace(name="openshift-operators", ensure_exists=True)
275+
operator_namespace = "openshift-opentelemetry-operator"
276+
operator_ns = Namespace(name=operator_namespace)
277+
if not operator_ns.exists:
278+
operator_ns.create()
271279

272280
package_name = "opentelemetry-product"
273281

@@ -317,7 +325,7 @@ def otel_collector(
317325
otel_csv: ClusterServiceVersion = get_cluster_service_version(
318326
client=admin_client,
319327
prefix="opentelemetry",
320-
namespace="openshift-operators",
328+
namespace="openshift-opentelemetry-operator",
321329
)
322330

323331
# Extract OpenTelemetryCollector CR example from ALM examples
@@ -568,7 +576,7 @@ def tempo_traces_service_portforward(admin_client, model_namespace, tempo_stack)
568576
oc -n <ns> port-forward svc/tempo-my-tempo-stack-query-frontend 16686:16686
569577
"""
570578
namespace = model_namespace.name
571-
service_name = f"tempo-{tempo_stack.name}-query-frontend" # tempo-my-tempo-stack-query-frontend"
579+
service_name = f"tempo-{tempo_stack.name}-query-frontend"
572580
local_port = 16686
573581
local_url = f"http://localhost:{local_port}"
574582

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import http
2-
31
import pytest
42
import requests
53
import yaml
@@ -403,15 +401,18 @@ def test_guardrails_traces_in_tempo(
403401

404402
@retry(wait_timeout=Timeout.TIMEOUT_1MIN, sleep=5)
405403
def check_traces():
406-
response = requests.get(f"{tempo_traces_service_portforward}/api/traces?service=fms_guardrails_orchestr8")
407-
if response.status_code == http.HTTPStatus.OK:
408-
data = response.json()
409-
if data.get("data"): # non-empty list of traces
410-
return data
411-
return False
412-
413-
traces = check_traces()
414-
assert traces["data"], "No traces found in Tempo for Guardrails Orchestrator"
404+
services = requests.get(f"{tempo_traces_service_portforward}/api/services").json().get("data", [])
405+
406+
guardrails_services = [s for s in services if "guardrails" in s]
407+
if not guardrails_services:
408+
return False
409+
410+
svc = guardrails_services[0]
411+
412+
traces = requests.get(f"{tempo_traces_service_portforward}/api/traces?service={svc}").json()
413+
414+
if traces.get("data"):
415+
return traces
415416

416417

417418
@pytest.mark.parametrize(

tests/model_registry/model_catalog/test_default_model_catalog.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ocp_resources.deployment import Deployment
88
from simple_logger.logger import get_logger
99
from typing import Self, Any
10+
from timeout_sampler import TimeoutSampler
11+
from kubernetes.dynamic.exceptions import ResourceNotFoundError
1012

1113
from ocp_resources.pod import Pod
1214
from ocp_resources.config_map import ConfigMap
@@ -170,10 +172,20 @@ def test_model_catalog_default_catalog_sources(
170172
Validate specific user can access default model catalog source
171173
"""
172174
LOGGER.info("Attempting client connection with token")
173-
result = execute_get_command(
174-
url=f"{model_catalog_rest_url[0]}sources",
175-
headers=get_rest_headers(token=user_token_for_api_calls),
176-
)["items"]
175+
176+
url = f"{model_catalog_rest_url[0]}sources"
177+
headers = get_rest_headers(token=user_token_for_api_calls)
178+
179+
# Retry for up to 2 minutes to allow RBAC propagation
180+
# Accept ResourceNotFoundError (401) as a transient error during RBAC propagation
181+
sampler = TimeoutSampler(
182+
wait_timeout=120,
183+
sleep=5,
184+
func=lambda: execute_get_command(url=url, headers=headers)["items"],
185+
exceptions_dict={ResourceNotFoundError: []},
186+
)
187+
188+
result = next(iter(sampler))
177189
assert result
178190
items_to_validate = []
179191
if pytestconfig.option.pre_upgrade or pytestconfig.option.post_upgrade:

tests/model_registry/rbac/conftest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,16 @@ def created_role_binding_user(
122122
mr_access_role: Role,
123123
user_credentials_rbac: dict[str, str],
124124
) -> Generator[RoleBinding, None, None]:
125-
if is_byoidc:
126-
user_credentials_rbac["username"] = "mr-non-admin"
127-
LOGGER.info(f"Using user {user_credentials_rbac['username']}")
125+
# Determine the username to use without mutating the shared fixture
126+
username = "mr-non-admin" if is_byoidc else user_credentials_rbac["username"]
127+
LOGGER.info(f"Using user {username}")
128128
yield from create_role_binding(
129129
admin_client=admin_client,
130130
model_registry_namespace=model_registry_namespace,
131131
name="test-model-registry-access",
132132
mr_access_role=mr_access_role,
133133
subjects_kind="User",
134-
subjects_name=user_credentials_rbac["username"],
134+
subjects_name=username,
135135
)
136136

137137

tests/model_registry/rbac/test_mr_rbac.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,17 @@ def test_user_added_to_group(
9191
1. After adding the user to the appropriate group, they gain access
9292
"""
9393
# Wait for access to be granted
94-
user_credentials_rbac["username"] = "mr-user1"
94+
# Create a copy to avoid mutating the shared fixture
95+
creds_copy = user_credentials_rbac.copy()
96+
creds_copy["username"] = "mr-user1"
9597
sampler = TimeoutSampler(
9698
wait_timeout=240,
9799
sleep=5,
98100
func=assert_positive_mr_registry,
99101
model_registry_instance_rest_endpoint=model_registry_instance_rest_endpoint[0],
100102
token=get_openshift_token()
101103
if not is_byoidc
102-
else get_mr_user_token(admin_client=admin_client, user_credentials_rbac=user_credentials_rbac),
104+
else get_mr_user_token(admin_client=admin_client, user_credentials_rbac=creds_copy),
103105
)
104106
for _ in sampler:
105107
break # Break after first successful iteration
@@ -145,13 +147,15 @@ def test_add_single_user_role_binding(
145147
2. The user can access the Model Registry after being granted access
146148
"""
147149
if is_byoidc:
148-
user_credentials_rbac["username"] = "mr-non-admin"
150+
# Create a copy to avoid mutating the shared fixture
151+
creds_copy = user_credentials_rbac.copy()
152+
creds_copy["username"] = "mr-non-admin"
149153
sampler = TimeoutSampler(
150154
wait_timeout=120,
151155
sleep=5,
152156
func=assert_positive_mr_registry,
153157
model_registry_instance_rest_endpoint=model_registry_instance_rest_endpoint[0],
154-
token=get_mr_user_token(admin_client=admin_client, user_credentials_rbac=user_credentials_rbac),
158+
token=get_mr_user_token(admin_client=admin_client, user_credentials_rbac=creds_copy),
155159
)
156160
for _ in sampler:
157161
break # Break after first successful iteration

tests/model_registry/rbac/test_mr_rbac_sa.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# AI Disclaimer: Google Gemini 2.5 pro has been used to generate a majority of this code, with human review and editing.
22
import pytest
3-
from typing import Self
3+
from typing import Any, Self
44
from simple_logger.logger import get_logger
55
from model_registry import ModelRegistry as ModelRegistryClient
66
from tests.model_registry.rbac.utils import build_mr_client_args
77
from utilities.infra import create_inference_token
8-
from mr_openapi.exceptions import ForbiddenException
8+
from mr_openapi.exceptions import ForbiddenException, UnauthorizedException
99
from ocp_resources.service_account import ServiceAccount
10+
from timeout_sampler import TimeoutSampler, retry
1011

1112
LOGGER = get_logger(name=__name__)
1213

@@ -44,12 +45,11 @@ def test_service_account_access_denied(
4445
)
4546
LOGGER.debug(f"Attempting client connection with args: {client_args}")
4647

47-
# Expect an exception related to HTTP 403
48-
with pytest.raises(ForbiddenException) as exc_info:
49-
_ = ModelRegistryClient(**client_args)
48+
# Retry for up to 2 minutes if we get UnauthorizedException (401) during kube-rbac-proxy initialization
49+
# Expect ForbiddenException (403) once kube-rbac-proxy is fully initialized
50+
http_error = _try_connection_expect_forbidden(client_args=client_args)
5051

5152
# Verify the status code from the caught exception
52-
http_error = exc_info.value
5353
assert http_error.body is not None, "HTTPError should have a response object"
5454
LOGGER.info(f"Received expected HTTP error: Status Code {http_error.status}")
5555
assert http_error.status == 403, f"Expected HTTP 403 Forbidden, but got {http_error.status}"
@@ -69,21 +69,44 @@ def test_service_account_access_granted(
6969
LOGGER.info(f"Targeting Model Registry REST endpoint: {model_registry_instance_rest_endpoint[0]}")
7070
LOGGER.info("Applied RBAC Role/Binding via fixtures. Expecting access GRANT.")
7171

72-
# Create a fresh token to bypass OAuth proxy cache from previous test
72+
# Create a fresh token to bypass kube-rbac-proxy cache from previous test
7373
fresh_token = create_inference_token(model_service_account=service_account)
74+
client_args = build_mr_client_args(
75+
rest_endpoint=model_registry_instance_rest_endpoint[0], token=fresh_token, author="rbac-test-granted"
76+
)
77+
LOGGER.debug(f"Attempting client connection with args: {client_args}")
78+
79+
# Retry for up to 2 minutes to allow RBAC propagation
80+
# Accept UnauthorizedException (401) as a transient error during RBAC propagation
81+
sampler = TimeoutSampler(
82+
wait_timeout=120,
83+
sleep=5,
84+
func=lambda: ModelRegistryClient(**client_args),
85+
exceptions_dict={UnauthorizedException: []},
86+
)
7487

7588
try:
76-
client_args = build_mr_client_args(
77-
rest_endpoint=model_registry_instance_rest_endpoint[0], token=fresh_token, author="rbac-test-granted"
78-
)
79-
LOGGER.debug(f"Attempting client connection with args: {client_args}")
80-
mr_client_success = ModelRegistryClient(**client_args)
89+
# Get the first successful result
90+
mr_client_success = next(iter(sampler))
8191
assert mr_client_success is not None, "Client initialization failed after granting permissions"
8292
LOGGER.info("Client instantiated successfully after granting permissions.")
83-
8493
except Exception as e:
85-
# If we get an exception here, it's unexpected, especially 403
86-
LOGGER.error(f"Received unexpected general error after granting access: {e}", exc_info=True)
94+
LOGGER.error(f"Failed to access Model Registry after granting permissions: {e}", exc_info=True)
8795
raise
8896

8997
LOGGER.info("--- RBAC Test Completed Successfully ---")
98+
99+
100+
@retry(wait_timeout=120, sleep=5, exceptions_dict={UnauthorizedException: []})
101+
def _try_connection_expect_forbidden(client_args: dict[str, Any]) -> ForbiddenException:
102+
"""
103+
Attempts to create a ModelRegistryClient and expects ForbiddenException.
104+
Retries on UnauthorizedException (401) during kube-rbac-proxy initialization.
105+
Returns the ForbiddenException when received.
106+
"""
107+
try:
108+
ModelRegistryClient(**client_args)
109+
raise AssertionError("Expected ForbiddenException but client connection succeeded")
110+
except ForbiddenException as e:
111+
# This is what we want - 403 Forbidden
112+
return e

0 commit comments

Comments
 (0)