Skip to content

Commit 857b3db

Browse files
committed
Configure OpenTelemetry on existing tests
modified: tests/model_explainability/guardrails/conftest.py modified: tests/model_explainability/guardrails/test_guardrails.py
1 parent 97aac18 commit 857b3db

File tree

2 files changed

+259
-2
lines changed

2 files changed

+259
-2
lines changed

tests/model_explainability/guardrails/conftest.py

Lines changed: 202 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,31 @@
66
import pytest
77
from _pytest.fixtures import FixtureRequest
88
from kubernetes.dynamic import DynamicClient
9+
from kubernetes.dynamic.exceptions import ResourceNotFoundError
10+
from ocp_resources.cluster_service_version import ClusterServiceVersion
911
from ocp_resources.config_map import ConfigMap
1012
from ocp_resources.deployment import Deployment
1113
from ocp_resources.guardrails_orchestrator import GuardrailsOrchestrator
1214
from ocp_resources.inference_service import InferenceService
1315
from ocp_resources.namespace import Namespace
1416
from ocp_resources.pod import Pod
15-
from ocp_resources.resource import ResourceEditor
17+
from ocp_resources.resource import ResourceEditor, NamespacedResource
1618
from ocp_resources.route import Route
1719
from ocp_resources.secret import Secret
1820
from ocp_resources.serving_runtime import ServingRuntime
21+
from ocp_resources.subscription import Subscription
22+
from ocp_utilities.operators import install_operator, uninstall_operator
1923
from pytest_testconfig import py_config
24+
from timeout_sampler import TimeoutSampler
2025

2126
from utilities.certificates_utils import create_ca_bundle_file
2227
from utilities.constants import (
2328
KServeDeploymentType,
2429
Labels,
30+
Timeout,
2531
)
2632
from utilities.inference_utils import create_isvc
27-
33+
from utilities.operator_utils import get_cluster_service_version
2834

2935
GUARDRAILS_ORCHESTRATOR_NAME = "guardrails-orchestrator"
3036

@@ -351,3 +357,197 @@ def hap_detector_route(
351357
service=hap_detector_isvc.name,
352358
wait_for_resource=True,
353359
)
360+
361+
362+
class OpenTelemetryCollector(NamespacedResource):
363+
"""
364+
OpenTelemetryCollector is the Schema for the OpenTelemetry Collectors
365+
"""
366+
api_group: str = "opentelemetry.io"
367+
368+
def __init__(self, **kwargs: Any) -> None:
369+
super().__init__(**kwargs)
370+
371+
372+
class OpenTelemetryOperator(NamespacedResource):
373+
"""
374+
OpenTelemetryOperator is the Schema for the opentelemetryoperators API
375+
"""
376+
377+
api_group: str = "opentelemetry.io"
378+
379+
def __init__(self, **kwargs: Any) -> None:
380+
super().__init__(**kwargs)
381+
382+
@pytest.fixture(scope="class")
383+
def installed_opentelemetry_operator(admin_client: DynamicClient, model_namespace: Namespace):
384+
"""
385+
Install the Red Hat OpenTelemetry operator in the same namespace as the test
386+
so CRs like OpenTelemetryCollector/Instrumentation are watched properly.
387+
"""
388+
operator_ns = model_namespace
389+
operator_name = "opentelemetry-operator"
390+
391+
otel_subscription = Subscription(
392+
client=admin_client,
393+
namespace=operator_ns.name,
394+
name=operator_name,
395+
)
396+
397+
if not otel_subscription.exists:
398+
install_operator(
399+
admin_client=admin_client,
400+
target_namespaces=[operator_ns.name],
401+
name=operator_name,
402+
channel="stable",
403+
source="redhat-operators",
404+
operator_namespace=operator_ns.name,
405+
timeout=Timeout.TIMEOUT_15MIN,
406+
install_plan_approval="Automatic",
407+
starting_csv="opentelemetry-operator.v0.127.0-2",
408+
)
409+
410+
deployment = Deployment(
411+
client=admin_client,
412+
namespace=operator_ns.name,
413+
name="opentelemetry-operator-controller-manager",
414+
wait_for_resource=True,
415+
)
416+
deployment.wait_for_replicas()
417+
418+
yield
419+
420+
uninstall_operator(
421+
admin_client=admin_client,
422+
name=operator_name,
423+
operator_namespace=operator_ns.name,
424+
clean_up_namespace=True,
425+
)
426+
427+
@pytest.fixture(scope="class")
428+
def otel_operator_cr(
429+
admin_client: DynamicClient,
430+
installed_opentelemetry_operator: None,
431+
model_namespace: Namespace, # use the test namespace
432+
) -> Generator[OpenTelemetryCollector, Any, Any]:
433+
"""Create an OpenTelemetryCollector CR in the test namespace based on ALM examples from the CSV."""
434+
otel_csv: ClusterServiceVersion = get_cluster_service_version(
435+
client=admin_client,
436+
prefix="opentelemetry",
437+
namespace=model_namespace.name, # fetch CSV from the same namespace as operator
438+
)
439+
440+
alm_examples: list[dict[str, Any]] = otel_csv.get_alm_examples()
441+
otel_cr_dict: dict[str, Any] = next(
442+
example for example in alm_examples if example["kind"] == "OpenTelemetryCollector"
443+
)
444+
445+
if not otel_cr_dict:
446+
raise ResourceNotFoundError(f"No OpenTelemetryCollector dict found in alm_examples for CSV {otel_csv.name}")
447+
448+
otel_cr_dict["metadata"]["namespace"] = model_namespace.name # create CR in test namespace
449+
450+
with OpenTelemetryCollector(kind_dict=otel_cr_dict) as otel_cr:
451+
otel_cr.wait_for_condition(
452+
condition="Available",
453+
status=OpenTelemetryCollector.Condition.Status.TRUE,
454+
timeout=Timeout.TIMEOUT_10MIN,
455+
)
456+
yield otel_cr
457+
458+
459+
class Jaeger(NamespacedResource):
460+
"""
461+
Jaeger instance CR for Red Hat OpenShift distributed tracing platform.
462+
"""
463+
api_group: str = "io.jaegertracing.openshift.v1" # usually something like this
464+
465+
def __init__(self, **kwargs: Any) -> None:
466+
super().__init__(**kwargs)
467+
468+
@pytest.fixture(scope="session")
469+
def installed_jaeger_operator(admin_client: DynamicClient) -> Generator[None, Any, None]:
470+
"""Install Red Hat OpenShift distributed tracing platform (Jaeger operator)."""
471+
operator_ns = Namespace(name="openshift-distributed-tracing", ensure_exists=True)
472+
operator_name = "jaeger-product"
473+
474+
jaeger_subscription = Subscription(
475+
client=admin_client,
476+
namespace=operator_ns.name,
477+
name=operator_name,
478+
source="redhat-operators",
479+
channel="stable",
480+
starting_csv="jaeger-product.v1.65.0-4",
481+
installPlanApproval="Automatic",
482+
)
483+
484+
if not jaeger_subscription.exists:
485+
jaeger_subscription.create()
486+
487+
yield
488+
489+
@pytest.fixture(scope="class")
490+
def jaeger_instance(
491+
admin_client: DynamicClient, model_namespace: Namespace
492+
) -> Generator[Jaeger, Any, None]:
493+
"""Create a Jaeger instance in the test namespace using default all-in-one strategy."""
494+
jaeger_csv: ClusterServiceVersion = get_cluster_service_version(
495+
client=admin_client,
496+
prefix="jaeger",
497+
namespace="openshift-distributed-tracing"
498+
)
499+
alm_examples: list[dict[str, Any]] = jaeger_csv.get_alm_examples()
500+
jaeger_dict: dict[str, Any] = next(example for example in alm_examples if example["kind"] == "Jaeger")
501+
502+
if not jaeger_dict:
503+
raise ResourceNotFoundError(f"No Jaeger dict found in alm_examples for CSV {jaeger_csv.name}")
504+
505+
jaeger_dict["metadata"]["namespace"] = model_namespace.name
506+
jaeger_dict["metadata"]["name"] = "simplest"
507+
jaeger_dict["spec"]["strategy"] = "allInOne"
508+
509+
with Jaeger(kind_dict=jaeger_dict) as jaeger:
510+
wait_for_jaeger_pods(
511+
client=admin_client,
512+
jaeger_name=jaeger.name,
513+
namespace=model_namespace.name,
514+
)
515+
yield jaeger
516+
517+
def wait_for_jaeger_operator_deployments(namespace: str) -> None:
518+
"""
519+
Wait for the Jaeger operator deployment to be ready.
520+
"""
521+
operator_deployment_name = "jaeger-operator"
522+
523+
deployment = Deployment(name=operator_deployment_name, namespace=namespace)
524+
deployment.wait_for_replicas()
525+
526+
527+
def wait_for_jaeger_pods(client: DynamicClient, jaeger_name: str, namespace: str, timeout: int = Timeout.TIMEOUT_15MIN) -> None:
528+
"""
529+
Wait for pods created by a Jaeger instance to be ready.
530+
"""
531+
def _get_jaeger_pods() -> list[Pod]:
532+
return [
533+
_pod
534+
for _pod in Pod.get(
535+
dyn_client=client,
536+
namespace=namespace,
537+
label_selector=f"app.kubernetes.io/instance={jaeger_name}",
538+
)
539+
]
540+
541+
sampler = TimeoutSampler(wait_timeout=timeout, sleep=1, func=lambda: bool(_get_jaeger_pods()))
542+
543+
for sample in sampler:
544+
if sample:
545+
break
546+
547+
pods = _get_jaeger_pods()
548+
for pod in pods:
549+
pod.wait_for_condition(
550+
condition=Pod.Condition.READY,
551+
status="True",
552+
)
553+

tests/model_explainability/guardrails/test_guardrails.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import pytest
55
import requests
66
import yaml
7+
from ocp_resources.pod import Pod
78
from simple_logger.logger import get_logger
89
from timeout_sampler import retry
910

1011
from tests.model_explainability.constants import MNT_MODELS
12+
from tests.model_explainability.guardrails.conftest import wait_for_jaeger_pods
1113
from tests.model_explainability.guardrails.constants import (
1214
QWEN_ISVC_NAME,
1315
CHAT_GENERATION_CONFIG,
@@ -481,3 +483,58 @@ def test_guardrails_several_detector_negative_detection(
481483
)
482484

483485
verify_negative_detection_response(response=response)
486+
487+
@pytest.mark.parametrize(
488+
"model_namespace, orchestrator_config, guardrails_orchestrator",
489+
[
490+
pytest.param(
491+
{"name": "test-guardrails-opentelemetry"},
492+
{
493+
"orchestrator_config_data": {
494+
"config.yaml": yaml.dump({
495+
"chat_generation": CHAT_GENERATION_CONFIG,
496+
"detectors": BUILTIN_DETECTOR_CONFIG,
497+
})
498+
},
499+
},
500+
{"enable_built_in_detectors": False, "enable_guardrails_gateway": False},
501+
)
502+
],
503+
indirect=True,
504+
)
505+
@pytest.mark.rawdeployment
506+
class TestGuardrailsOrchestratorWithOpenTelemetry:
507+
"""
508+
Tests that Guardrails Orchestrator can be instrumented with OpenTelemetry.
509+
"""
510+
511+
def test_guardrails_with_opentelemetry(
512+
self,
513+
guardrails_orchestrator,
514+
otel_operator_cr,
515+
jaeger_instance,
516+
):
517+
orchestrator = guardrails_orchestrator
518+
assert orchestrator.exists
519+
520+
# Wait for all orchestrator pods to be ready
521+
pods = Pod.get(
522+
dyn_client=orchestrator.client,
523+
namespace=orchestrator.namespace,
524+
label_selector=f"app={orchestrator.name}",
525+
)
526+
527+
for pod in pods:
528+
pod.wait_for_condition(
529+
condition=Pod.Condition.READY,
530+
status="True",
531+
timeout=Timeout.TIMEOUT_10MIN
532+
)
533+
534+
# Wait for Jaeger instance pods to be ready
535+
wait_for_jaeger_pods(
536+
client=jaeger_instance.client,
537+
jaeger_name=jaeger_instance.name,
538+
namespace=jaeger_instance.namespace,
539+
timeout=Timeout.TIMEOUT_10MIN
540+
)

0 commit comments

Comments
 (0)