litellm/litellm/llms/cohere/rerank/guardrail_translation/handler.py at 2d5aa9aaeecba35202cbc17d539c5cdf5288bdcf · BerriAI/litellm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Cohere Rerank Handler for Unified Guardrails

This module provides guardrail translation support for the rerank endpoint.
The handler processes only the 'query' parameter for guardrails.
"""

from typing import TYPE_CHECKING, Any, Optional

from litellm._logging import verbose_proxy_logger
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
from litellm.types.utils import GenericGuardrailAPIInputs

if TYPE_CHECKING:
    from litellm.integrations.custom_guardrail import CustomGuardrail
    from litellm.types.rerank import RerankResponse


class CohereRerankHandler(BaseTranslation):
    """
    Handler for processing rerank requests with guardrails.

    This class provides methods to:
    1. Process input query (pre-call hook)
    2. Process output response (post-call hook) - not applicable for rerank

    The handler specifically processes:
    - The 'query' parameter (string)

    Note: Documents are not processed by guardrails as they are the corpus
    being searched, not user input.
    """

    async def process_input_messages(
        self,
        data: dict,
        guardrail_to_apply: "CustomGuardrail",
        litellm_logging_obj: Optional[Any] = None,
    ) -> Any:
        """
        Process input query by applying guardrails.

        Args:
            data: Request data dictionary containing 'query'
            guardrail_to_apply: The guardrail instance to apply

        Returns:
            Modified data with guardrails applied to query only
        """
        # Process query only
        query = data.get("query")
        if query is not None and isinstance(query, str):
            inputs = GenericGuardrailAPIInputs(texts=[query])
            # Include model information if available
            model = data.get("model")
            if model:
                inputs["model"] = model
            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
                inputs=inputs,
                request_data=data,
                input_type="request",
                logging_obj=litellm_logging_obj,
            )
            guardrailed_texts = guardrailed_inputs.get("texts", [])
            data["query"] = guardrailed_texts[0] if guardrailed_texts else query

            verbose_proxy_logger.debug(
                "Rerank: Applied guardrail to query. "
                "Original length: %d, New length: %d",
                len(query),
                len(data["query"]),
            )
        else:
            verbose_proxy_logger.debug(
                "Rerank: No query to process or query is not a string"
            )

        return data

    async def process_output_response(
        self,
        response: "RerankResponse",
        guardrail_to_apply: "CustomGuardrail",
        litellm_logging_obj: Optional[Any] = None,
        user_api_key_dict: Optional[Any] = None,
        request_data: Optional[dict] = None,
    ) -> Any:
        """
        Process output response - not applicable for rerank.

        Rerank responses contain relevance scores and indices, not text,
        so there's nothing to apply guardrails to. This method returns
        the response unchanged.

        Args:
            response: Rerank response object with rankings
            guardrail_to_apply: The guardrail instance (unused)
            litellm_logging_obj: Optional logging object (unused)
            user_api_key_dict: User API key metadata (unused)

        Returns:
            Unmodified response (rankings don't need text guardrails)
        """
        verbose_proxy_logger.debug(
            "Rerank: Output processing not applicable "
            "(output contains relevance scores, not text)"
        )
        return response