deepeval-multirun/examples/environment_config.py at main · MRLab12/deepeval-multirun · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
Example: Environment-based configuration.

This example demonstrates how to use environment variables to conditionally
enable multi-run evaluation in different environments (dev, staging, production).
"""

import os
from deepeval.test_case import LLMTestCase
from deepeval.metrics import AnswerRelevancyMetric
from deepeval import assert_test
from deepeval_multirun import (
    multirun_assert_test,
    should_use_multirun_evaluation,
    get_environment_info,
)


def smart_assert_test(test_case, metrics):
    """
    Smart assertion that automatically chooses between single-run and multi-run
    evaluation based on environment configuration.
    """
    if should_use_multirun_evaluation():
        print("Using multi-run evaluation")
        multirun_assert_test(test_case, metrics)
    else:
        print("Using standard single-run evaluation")
        assert_test(test_case, metrics)


def test_with_environment_config():
    """Test that respects environment configuration."""
    # Print environment info
    print(f"\n{get_environment_info()}\n")

    test_case = LLMTestCase(
        input="What is machine learning?",
        actual_output=(
            "Machine learning is a subset of artificial intelligence that enables "
            "systems to learn and improve from experience without being explicitly programmed."
        ),
        expected_output="Machine learning is AI that learns from data",
    )

    metric = AnswerRelevancyMetric(threshold=0.7)

    smart_assert_test(test_case, [metric])


def demo_different_environments():
    """Demonstrate behavior in different environments."""

    test_case = LLMTestCase(
        input="What is the speed of light?",
        actual_output="The speed of light in vacuum is approximately 299,792,458 meters per second.",
        expected_output="299,792,458 m/s",
    )

    metric = AnswerRelevancyMetric(threshold=0.7)

    print("\n" + "=" * 70)
    print("DEMO: Different Environment Configurations")
    print("=" * 70)

    # Scenario 1: Development (default - no multi-run)
    print("\n1️⃣  Development Environment (multi-run disabled)")
    print("-" * 70)
    os.environ.pop("ENABLE_MULTIRUN", None)
    print(f"Config: {get_environment_info()}")
    smart_assert_test(test_case, [metric])

    # Scenario 2: Staging (multi-run enabled with defaults)
    print("\n2️⃣  Staging Environment (multi-run enabled, default settings)")
    print("-" * 70)
    os.environ["ENABLE_MULTIRUN"] = "true"
    os.environ["ENVIRONMENT"] = "staging"
    print(f"Config: {get_environment_info()}")
    smart_assert_test(test_case, [metric])

    # Scenario 3: Production (multi-run enabled with custom settings)
    print("\n3️⃣  Production Environment (multi-run enabled, custom settings)")
    print("-" * 70)
    os.environ["ENABLE_MULTIRUN"] = "true"
    os.environ["ENVIRONMENT"] = "production"
    os.environ["MULTIRUN_NUM_RUNS"] = "7"
    os.environ["MULTIRUN_PASS_THRESHOLD"] = "5"
    os.environ["MULTIRUN_RATE_LIMIT_DELAY"] = "2.0"
    print(f"Config: {get_environment_info()}")
    smart_assert_test(test_case, [metric])

    # Clean up
    for key in [
        "ENABLE_MULTIRUN",
        "ENVIRONMENT",
        "MULTIRUN_NUM_RUNS",
        "MULTIRUN_PASS_THRESHOLD",
        "MULTIRUN_RATE_LIMIT_DELAY",
    ]:
        os.environ.pop(key, None)


if __name__ == "__main__":
    # Run with current environment
    print("Testing with current environment configuration:")
    test_with_environment_config()

    print("\n" + "=" * 70)
    print("\nDemonstrating different environment configurations:")
    demo_different_environments()

    print("\n" + "=" * 70)
    print("✅ All environment-based tests completed!")