Skip to content

Commit 2eefbd1

Browse files
committed
Configure OpenTelemetry on existing tests
modified: tests/model_explainability/guardrails/conftest.py modified: tests/model_explainability/guardrails/test_guardrails.py modified: pyproject.toml modified: tests/model_explainability/guardrails/conftest.py modified: tests/model_explainability/guardrails/test_guardrails.py
1 parent 97aac18 commit 2eefbd1

File tree

3 files changed

+243
-3
lines changed

3 files changed

+243
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ dependencies = [
6262
"timeout-sampler>=1.0.6",
6363
"shortuuid>=1.0.13",
6464
"jira>=3.8.0",
65-
"openshift-python-wrapper>=11.0.57",
65+
"openshift-python-wrapper>=11.0.92",
6666
"semver>=3.0.4",
6767
"sqlalchemy>=2.0.40",
6868
"pytest-order>=1.3.0",

tests/model_explainability/guardrails/conftest.py

Lines changed: 203 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,32 @@
66
import pytest
77
from _pytest.fixtures import FixtureRequest
88
from kubernetes.dynamic import DynamicClient
9+
from kubernetes.dynamic.exceptions import ResourceNotFoundError
10+
from ocp_resources.cluster_service_version import ClusterServiceVersion
911
from ocp_resources.config_map import ConfigMap
1012
from ocp_resources.deployment import Deployment
13+
from ocp_resources.exceptions import MissingRequiredArgumentError
1114
from ocp_resources.guardrails_orchestrator import GuardrailsOrchestrator
1215
from ocp_resources.inference_service import InferenceService
1316
from ocp_resources.namespace import Namespace
1417
from ocp_resources.pod import Pod
15-
from ocp_resources.resource import ResourceEditor
18+
from ocp_resources.resource import ResourceEditor, NamespacedResource
1619
from ocp_resources.route import Route
1720
from ocp_resources.secret import Secret
1821
from ocp_resources.serving_runtime import ServingRuntime
22+
from ocp_resources.subscription import Subscription
23+
from ocp_utilities.operators import install_operator, uninstall_operator
1924
from pytest_testconfig import py_config
25+
from timeout_sampler import TimeoutSampler
2026

2127
from utilities.certificates_utils import create_ca_bundle_file
2228
from utilities.constants import (
2329
KServeDeploymentType,
2430
Labels,
31+
Timeout, OPENSHIFT_OPERATORS,
2532
)
2633
from utilities.inference_utils import create_isvc
27-
34+
from utilities.operator_utils import get_cluster_service_version
2835

2936
GUARDRAILS_ORCHESTRATOR_NAME = "guardrails-orchestrator"
3037

@@ -351,3 +358,197 @@ def hap_detector_route(
351358
service=hap_detector_isvc.name,
352359
wait_for_resource=True,
353360
)
361+
362+
@pytest.fixture(scope="class")
363+
def installed_opentelemetry_operator(admin_client: DynamicClient) -> Generator[None, Any, None]:
364+
"""
365+
Installs the OpenTelemetry Operator and waits for its deployment.
366+
"""
367+
operator_ns = Namespace(name="openshift-operators", ensure_exists=True)
368+
369+
package_name = "opentelemetry-operator"
370+
371+
install_operator(
372+
admin_client=admin_client,
373+
target_namespaces=[operator_ns.name],
374+
name=package_name,
375+
channel="stable",
376+
source="redhat-operators",
377+
operator_namespace=operator_ns.name,
378+
timeout=Timeout.TIMEOUT_15MIN,
379+
install_plan_approval="Automatic",
380+
starting_csv="opentelemetry-operator.v0.127.0-2"
381+
)
382+
383+
deployment = Deployment(
384+
client=admin_client,
385+
namespace=operator_ns.name,
386+
name="opentelemetry-operator-controller-manager",
387+
wait_for_resource=True,
388+
)
389+
deployment.wait_for_replicas()
390+
391+
yield
392+
393+
uninstall_operator(
394+
admin_client=admin_client,
395+
name=package_name,
396+
operator_namespace=operator_ns.name,
397+
clean_up_namespace=False,
398+
)
399+
400+
@pytest.fixture(scope="class")
401+
def otel_operator_cr(
402+
admin_client: DynamicClient,
403+
installed_opentelemetry_operator: None,
404+
model_namespace: Namespace,
405+
) -> Generator[OpenTelemetryCollector, Any, Any]:
406+
"""Create an OpenTelemetryCollector CR in the test namespace."""
407+
otel_csv: ClusterServiceVersion = get_cluster_service_version(
408+
client=admin_client,
409+
prefix="opentelemetry",
410+
namespace=model_namespace.name,
411+
)
412+
413+
alm_examples: list[dict[str, Any]] = otel_csv.get_alm_examples()
414+
otel_cr_dict: dict[str, Any] = next(
415+
example for example in alm_examples if example["kind"] == "OpenTelemetryCollector"
416+
)
417+
418+
if not otel_cr_dict:
419+
raise ResourceNotFoundError(
420+
f"No OpenTelemetryCollector dict found in alm_examples for CSV {otel_csv.name}"
421+
)
422+
423+
otel_cr_dict["metadata"]["namespace"] = model_namespace.name
424+
425+
with OpenTelemetryCollector(kind_dict=otel_cr_dict) as otel_cr:
426+
otel_cr.wait_for_condition(
427+
condition="Available",
428+
status=OpenTelemetryCollector.Condition.Status.TRUE,
429+
timeout=Timeout.TIMEOUT_10MIN,
430+
)
431+
yield otel_cr
432+
433+
@pytest.fixture(scope="class")
434+
def installed_jaeger_operator(
435+
admin_client: DynamicClient, model_namespace: Namespace
436+
) -> Generator[None, Any, None]:
437+
"""
438+
Installs the Jaeger operator and waits for its deployment.
439+
"""
440+
operator_ns = Namespace(name="openshift-operators", ensure_exists=True)
441+
package_name = "jaeger-product"
442+
443+
install_operator(
444+
admin_client=admin_client,
445+
target_namespaces=[operator_ns.name],
446+
name=package_name,
447+
channel="stable",
448+
source="redhat-operators",
449+
operator_namespace=operator_ns.name,
450+
timeout=Timeout.TIMEOUT_15MIN,
451+
install_plan_approval="Automatic",
452+
starting_csv="jaeger-operator.v1.65.0-4",
453+
)
454+
455+
deployment = Deployment(
456+
client=admin_client,
457+
namespace=operator_ns.name,
458+
name="jaeger-operator",
459+
wait_for_resource=True,
460+
)
461+
deployment.wait_for_replicas()
462+
463+
yield
464+
465+
uninstall_operator(
466+
admin_client=admin_client,
467+
name=package_name,
468+
operator_namespace=operator_ns.name,
469+
clean_up_namespace=False,
470+
)
471+
472+
473+
@pytest.fixture(scope="class")
474+
def jaeger_instance(
475+
admin_client: DynamicClient,
476+
installed_jaeger_operator: None,
477+
model_namespace: Namespace,
478+
) -> Generator[Jaeger, Any, None]:
479+
"""Create a Jaeger instance in the test namespace."""
480+
481+
# The CSV name is jaeger-operator.v1.65.0-4, so the prefix should be `jaeger-operator`.
482+
csv_prefix = "jaeger-operator"
483+
484+
jaeger_csv: ClusterServiceVersion = get_cluster_service_version(
485+
client=admin_client,
486+
prefix=csv_prefix,
487+
namespace=OPENSHIFT_OPERATORS,
488+
)
489+
490+
alm_examples: list[dict[str, Any]] = jaeger_csv.get_alm_examples()
491+
jaeger_dict: dict[str, Any] = next(
492+
example for example in alm_examples if example["kind"] == "Jaeger"
493+
)
494+
495+
if not jaeger_dict:
496+
raise ResourceNotFoundError(
497+
f"No Jaeger dict found in alm_examples for CSV {jaeger_csv.name}"
498+
)
499+
500+
jaeger_dict["metadata"]["namespace"] = model_namespace.name
501+
jaeger_dict["metadata"]["name"] = "simplest"
502+
503+
with Jaeger(kind_dict=jaeger_dict) as jaeger:
504+
wait_for_jaeger_pods(
505+
client=admin_client,
506+
jaeger_name=jaeger.name,
507+
namespace=model_namespace.name,
508+
)
509+
yield jaeger
510+
511+
def wait_for_jaeger_operator_deployments(namespace: str) -> None:
512+
"""
513+
Wait for the Jaeger operator deployment to be ready.
514+
"""
515+
operator_deployment_name = "jaeger-operator"
516+
517+
deployment = Deployment(name=operator_deployment_name, namespace=namespace)
518+
deployment.wait_for_replicas()
519+
520+
521+
def wait_for_jaeger_pods(
522+
client: DynamicClient,
523+
jaeger_name: str,
524+
namespace: str,
525+
timeout: int = Timeout.TIMEOUT_15MIN,
526+
) -> None:
527+
"""
528+
Wait for pods created by a Jaeger instance to be ready.
529+
"""
530+
531+
def _get_jaeger_pods() -> list[Pod]:
532+
return [
533+
_pod
534+
for _pod in Pod.get(
535+
dyn_client=client,
536+
namespace=namespace,
537+
label_selector=f"app.kubernetes.io/instance={jaeger_name}",
538+
)
539+
]
540+
541+
sampler = TimeoutSampler(
542+
wait_timeout=timeout, sleep=1, func=lambda: bool(_get_jaeger_pods())
543+
)
544+
545+
for sample in sampler:
546+
if sample:
547+
break
548+
549+
pods = _get_jaeger_pods()
550+
for pod in pods:
551+
pod.wait_for_condition(
552+
condition=Pod.Condition.READY,
553+
status="True",
554+
)

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import http
2+
import time
23
from typing import Dict, Any
34

45
import pytest
56
import requests
67
import yaml
8+
from ocp_resources.pod import Pod
79
from simple_logger.logger import get_logger
810
from timeout_sampler import retry
911

1012
from tests.model_explainability.constants import MNT_MODELS
13+
from tests.model_explainability.guardrails.conftest import wait_for_jaeger_pods
1114
from tests.model_explainability.guardrails.constants import (
1215
QWEN_ISVC_NAME,
1316
CHAT_GENERATION_CONFIG,
@@ -197,6 +200,41 @@ def test_guardrails_builtin_detectors_unsuitable_output(
197200
response=response, detector_id="regex", detection_name="email_address", detection_type="pii"
198201
)
199202

203+
def test_guardrails_traces_in_jaeger(
204+
self,
205+
admin_client,
206+
jaeger_instance,
207+
otel_operator_cr,
208+
model_namespace,
209+
minio_pod,
210+
minio_data_connection,
211+
orchestrator_config,
212+
guardrails_orchestrator,
213+
guardrails_gateway_config,
214+
215+
):
216+
"""
217+
Ensure that OpenTelemetry traces from Guardrails Orchestrator are collected in Jaeger.
218+
Equivalent to clicking 'Find Traces' in the Jaeger UI.
219+
"""
220+
# Jaeger query service URL (in-cluster)
221+
jaeger_query_service = f"http://{jaeger_instance.name}-query.{model_namespace.name}.svc:16686/api/traces"
222+
223+
# Wait a bit to allow traces to be generated
224+
time.sleep(10)
225+
226+
@retry(wait_timeout=Timeout.TIMEOUT_1MIN, sleep=5)
227+
def check_traces():
228+
response = requests.get(f"{jaeger_query_service}?service=jaeger-all-in-one")
229+
if response.status_code == http.HTTPStatus.OK:
230+
data = response.json()
231+
if data.get("data"): # non-empty list of traces
232+
return data
233+
return False
234+
235+
traces = check_traces()
236+
assert traces["data"], "No traces found in Jaeger for service jaeger-all-in-one"
237+
200238
@pytest.mark.parametrize(
201239
"message, url_path",
202240
[
@@ -481,3 +519,4 @@ def test_guardrails_several_detector_negative_detection(
481519
)
482520

483521
verify_negative_detection_response(response=response)
522+

0 commit comments

Comments
 (0)