guardrails-detectors/.github/workflows/test-llm-judge.yaml at e7ab7fabcebae97c1013e36df153a972c7052223 · opendatahub-io/guardrails-detectors · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
name: Tier 1 - LLM Judge unit tests

on:
  push:
    branches: [ main, incubation, stable ]
    paths:
      - 'detectors/llm_judge/**'
      - 'detectors/common/**'
      - 'tests/detectors/llm_judge/**'
      - 'tests/conftest.py'
      - '.github/workflows/test-llm-judge.yaml'
  pull_request:
    branches: [ main, incubation, stable ]
    paths:
      - 'detectors/llm_judge/**'
      - 'detectors/common/**'
      - 'tests/detectors/llm_judge/**'
      - 'tests/conftest.py'
      - '.github/workflows/test-llm-judge.yaml'

jobs:
  test-llm-judge:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.11"]

    permissions:
      contents: read
      checks: write

    steps:
    - name: Checkout code
      uses: actions/checkout@v4

    - name: Common test setup
      uses: ./.github/actions/test-setup
      with:
        component_name: 'llm-judge'
        requirements_files: 'detectors/common/requirements.txt detectors/common/requirements-dev.txt detectors/llm_judge/requirements.txt'
        precommit_paths: 'detectors/llm_judge tests/detectors/llm_judge detectors/common'
        python_version: ${{ matrix.python-version }}
        needs_system_deps: 'true'

    - name: Verify vllm-judge installation
      run: |
        python -c "
        try:
            import vllm_judge
            print('vllm-judge import successful')
            print(f'vllm-judge version: {vllm_judge.__version__}')
        except Exception as e:
            print(f'Error importing vllm-judge: {e}')
            print('This may be expected if running without GPU resources')
        "

    - name: Run LLM Judge Tests
      timeout-minutes: 15
      run: |
        pytest tests/detectors/llm_judge/ \
          --cov=detectors.llm_judge \
          --cov-report=term-missing \
          -v \
          --tb=short

    - name: Test LLM Judge detector initialization
      timeout-minutes: 5
      run: |
        python -c "
        try:
            from detectors.llm_judge.detector import LLMJudgeDetector
            print('LLMJudgeDetector import successful')

            # Test basic initialization (may fail without proper model access)
            try:
                detector = LLMJudgeDetector()
                print('LLMJudgeDetector initialization successful')
            except Exception as init_e:
                print(f'Note: LLMJudgeDetector initialization failed (may require specific models): {init_e}')
        except Exception as e:
            print(f'Error testing LLM Judge detector: {e}')
            exit(1)
        "
        echo "LLM Judge detector verification complete"