BerriAI
diff --git a/‎enterprise/enterprise_hooks/__init__.py
Lines changed: 36 additions & 0 deletions b/‎enterprise/enterprise_hooks/__init__.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎litellm/proxy/hooks/managed_files.py renamed to ‎enterprise/enterprise_hooks/managed_files.py
Lines changed: 41 additions & 58 deletions b/‎litellm/proxy/hooks/managed_files.py renamed to ‎enterprise/enterprise_hooks/managed_files.py
Lines changed: 41 additions & 58 deletions
diff --git a/‎litellm/integrations/custom_logger.py
Lines changed: 13 additions & 0 deletions b/‎litellm/integrations/custom_logger.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/prompt_templates/common_utils.py
Lines changed: 4 additions & 4 deletions b/‎litellm/litellm_core_utils/prompt_templates/common_utils.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎litellm/llms/base_llm/files/transformation.py
Lines changed: 57 additions & 2 deletions b/‎litellm/llms/base_llm/files/transformation.py
Lines changed: 57 additions & 2 deletions
diff --git a/‎litellm/proxy/_new_secret_config.yaml
Lines changed: 4 additions & 19 deletions b/‎litellm/proxy/_new_secret_config.yaml
Lines changed: 4 additions & 19 deletions
@@ -0,0 +1,36 @@
+import os
+from typing import Dict, Literal, Type, Union
+
+from litellm.integrations.custom_logger import CustomLogger
+
+from .managed_files import _PROXY_LiteLLMManagedFiles
+from .parallel_request_limiter_v2 import _PROXY_MaxParallelRequestsHandler
+
+ENTERPRISE_PROXY_HOOKS: Dict[str, Type[CustomLogger]] = {
+    "managed_files": _PROXY_LiteLLMManagedFiles,
+}
+
+
+## FEATURE FLAG HOOKS ##
+
+if os.getenv("EXPERIMENTAL_MULTI_INSTANCE_RATE_LIMITING", "false").lower() == "true":
+    ENTERPRISE_PROXY_HOOKS["max_parallel_requests"] = _PROXY_MaxParallelRequestsHandler
+
+
+def get_enterprise_proxy_hook(
+    hook_name: Union[
+        Literal[
+            "managed_files",
+            "max_parallel_requests",
+        ],
+        str,
+    ]
+):
+    """
+    Factory method to get a enterprise hook instance by name
+    """
+    if hook_name not in ENTERPRISE_PROXY_HOOKS:
+        raise ValueError(
+            f"Unknown hook: {hook_name}. Available hooks: {list(ENTERPRISE_PROXY_HOOKS.keys())}"
+        )
+    return ENTERPRISE_PROXY_HOOKS[hook_name]
@@ -4,14 +4,18 @@
 import base64
 import json
 import uuid
-from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union, cast
 
 from litellm import Router, verbose_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
+from litellm.llms.base_llm.files.transformation import BaseFileEndpoints
 from litellm.proxy._types import CallTypes, LiteLLM_ManagedFileTable, UserAPIKeyAuth
+from litellm.proxy.openai_files_endpoints.common_utils import (
+    _is_base64_encoded_unified_file_id,
+    convert_b64_uid_to_unified_uid,
+)
 from litellm.types.llms.openai import (
     AllMessageValues,
     ChatCompletionFileObject,
@@ -36,29 +40,7 @@
     PrismaClient = Any
 
 
-class BaseFileEndpoints(ABC):
-    @abstractmethod
-    async def afile_retrieve(
-        self,
-        file_id: str,
-        litellm_parent_otel_span: Optional[Span],
-    ) -> OpenAIFileObject:
-        pass
-
-    @abstractmethod
-    async def afile_list(
-        self, custom_llm_provider: str, **data: dict
-    ) -> List[OpenAIFileObject]:
-        pass
-
-    @abstractmethod
-    async def afile_delete(
-        self, custom_llm_provider: str, file_id: str, **data: dict
-    ) -> OpenAIFileObject:
-        pass
-
-
-class _PROXY_LiteLLMManagedFiles(CustomLogger):
+class _PROXY_LiteLLMManagedFiles(CustomLogger, BaseFileEndpoints):
     # Class variables or attributes
     def __init__(
         self, internal_usage_cache: InternalUsageCache, prisma_client: PrismaClient
@@ -153,12 +135,14 @@ async def async_pre_call_hook(
             "audio_transcription",
             "pass_through_endpoint",
             "rerank",
+            "acreate_batch",
         ],
     ) -> Union[Exception, str, Dict, None]:
         """
         - Detect litellm_proxy/ file_id
         - add dictionary of mappings of litellm_proxy/ file_id -> provider_file_id => {litellm_proxy/file_id: {"model_id": id, "file_id": provider_file_id}}
         """
+        print("REACHES async_pre_call_hook, call_type:", call_type)
         if call_type == CallTypes.completion.value:
             messages = data.get("messages")
             if messages:
@@ -169,9 +153,37 @@ async def async_pre_call_hook(
                     )
 
                     data["model_file_id_mapping"] = model_file_id_mapping
+        elif call_type == CallTypes.acreate_batch.value:
+            input_file_id = cast(Optional[str], data.get("input_file_id"))
+            if input_file_id:
+                model_file_id_mapping = await self.get_model_file_id_mapping(
+                    [input_file_id], user_api_key_dict.parent_otel_span
+                )
 
+                data["model_file_id_mapping"] = model_file_id_mapping
         return data
 
+    async def async_pre_call_deployment_hook(
+        self, kwargs: Dict[str, Any], call_type: Optional[CallTypes]
+    ) -> Optional[dict]:
+        """
+        Allow modifying the request just before it's sent to the deployment.
+        """
+        if call_type and call_type == CallTypes.acreate_batch:
+            input_file_id = cast(Optional[str], kwargs.get("input_file_id"))
+            model_file_id_mapping = cast(
+                Optional[Dict[str, Dict[str, str]]], kwargs.get("model_file_id_mapping")
+            )
+            model_id = cast(Optional[str], kwargs.get("model_info", {}).get("id", None))
+            mapped_file_id: Optional[str] = None
+            if input_file_id and model_file_id_mapping and model_id:
+                mapped_file_id = model_file_id_mapping.get(input_file_id, {}).get(
+                    model_id, None
+                )
+            if mapped_file_id:
+                kwargs["input_file_id"] = mapped_file_id
+        return kwargs
+
     def get_file_ids_from_messages(self, messages: List[AllMessageValues]) -> List[str]:
         """
         Gets file ids from messages
@@ -192,37 +204,6 @@ def get_file_ids_from_messages(self, messages: List[AllMessageValues]) -> List[s
                                 file_ids.append(file_id)
         return file_ids
 
-    @staticmethod
-    def _convert_b64_uid_to_unified_uid(b64_uid: str) -> str:
-        is_base64_unified_file_id = (
-            _PROXY_LiteLLMManagedFiles._is_base64_encoded_unified_file_id(b64_uid)
-        )
-        if is_base64_unified_file_id:
-            return is_base64_unified_file_id
-        else:
-            return b64_uid
-
-    @staticmethod
-    def _is_base64_encoded_unified_file_id(b64_uid: str) -> Union[str, Literal[False]]:
-        # Add padding back if needed
-        padded = b64_uid + "=" * (-len(b64_uid) % 4)
-        # Decode from base64
-        try:
-            decoded = base64.urlsafe_b64decode(padded).decode()
-            if decoded.startswith(SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value):
-                return decoded
-            else:
-                return False
-        except Exception:
-            return False
-
-    def convert_b64_uid_to_unified_uid(self, b64_uid: str) -> str:
-        is_base64_unified_file_id = self._is_base64_encoded_unified_file_id(b64_uid)
-        if is_base64_unified_file_id:
-            return is_base64_unified_file_id
-        else:
-            return b64_uid
-
     async def get_model_file_id_mapping(
         self, file_ids: List[str], litellm_parent_otel_span: Span
     ) -> dict:
@@ -247,7 +228,7 @@ async def get_model_file_id_mapping(
 
         for file_id in file_ids:
             ## CHECK IF FILE ID IS MANAGED BY LITELM
-            is_base64_unified_file_id = self._is_base64_encoded_unified_file_id(file_id)
+            is_base64_unified_file_id = _is_base64_encoded_unified_file_id(file_id)
 
             if is_base64_unified_file_id:
                 litellm_managed_file_ids.append(file_id)
@@ -300,6 +281,7 @@ async def acreate_file(
             create_file_request=create_file_request,
             internal_usage_cache=self.internal_usage_cache,
             litellm_parent_otel_span=litellm_parent_otel_span,
+            target_model_names_list=target_model_names_list,
         )
 
         ## STORE MODEL MAPPINGS IN DB
@@ -328,14 +310,15 @@ async def return_unified_file_id(
         create_file_request: CreateFileRequest,
         internal_usage_cache: InternalUsageCache,
         litellm_parent_otel_span: Span,
+        target_model_names_list: List[str],
     ) -> OpenAIFileObject:
         ## GET THE FILE TYPE FROM THE CREATE FILE REQUEST
         file_data = extract_file_data(create_file_request["file"])
 
         file_type = file_data["content_type"]
 
         unified_file_id = SpecialEnums.LITELLM_MANAGED_FILE_COMPLETE_STR.value.format(
-            file_type, str(uuid.uuid4())
+            file_type, str(uuid.uuid4()), ",".join(target_model_names_list)
         )
 
         # Convert to URL-safe base64 and strip padding
@@ -383,7 +366,7 @@ async def afile_delete(
         llm_router: Router,
         **data: Dict,
     ) -> OpenAIFileObject:
-        file_id = self.convert_b64_uid_to_unified_uid(file_id)
+        file_id = convert_b64_uid_to_unified_uid(file_id)
         model_file_id_mapping = await self.get_model_file_id_mapping(
             [file_id], litellm_parent_otel_span
         )
 
@@ -21,6 +21,7 @@
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
 from litellm.types.utils import (
     AdapterCompletionStreamWrapper,
+    CallTypes,
     LLMResponseTypes,
     ModelResponse,
     ModelResponseStream,
@@ -127,6 +128,18 @@ async def async_filter_deployments(
     ) -> List[dict]:
         return healthy_deployments
 
+    async def async_pre_call_deployment_hook(
+        self, kwargs: Dict[str, Any], call_type: Optional[CallTypes]
+    ) -> Optional[dict]:
+        """
+        Allow modifying the request just before it's sent to the deployment.
+
+        Use this instead of 'async_pre_call_hook' when you need to modify the request AFTER a deployment is selected, but BEFORE the request is sent.
+
+        Used in managed_files.py
+        """
+        pass
+
     async def async_pre_call_check(
         self, deployment: dict, parent_otel_span: Optional[Span]
     ) -> Optional[dict]:
 
@@ -346,14 +346,14 @@ def get_format_from_file_id(file_id: Optional[str]) -> Optional[str]:
     unified_file_id = litellm_proxy:{};unified_id,{}
     If not a unified file id, returns 'file' as default format
     """
-    from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
+    from litellm.proxy.openai_files_endpoints.common_utils import (
+        convert_b64_uid_to_unified_uid,
+    )
 
     if not file_id:
         return None
     try:
-        transformed_file_id = (
-            _PROXY_LiteLLMManagedFiles._convert_b64_uid_to_unified_uid(file_id)
-        )
+        transformed_file_id = convert_b64_uid_to_unified_uid(file_id)
         if transformed_file_id.startswith(
             SpecialEnums.LITELM_MANAGED_FILE_ID_PREFIX.value
         ):
 
@@ -1,5 +1,5 @@
-from abc import abstractmethod
-from typing import TYPE_CHECKING, Any, List, Optional, Union
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
 import httpx
 
@@ -8,17 +8,23 @@
     CreateFileRequest,
     OpenAICreateFileRequestOptionalParams,
     OpenAIFileObject,
+    OpenAIFilesPurpose,
 )
 from litellm.types.utils import LlmProviders, ModelResponse
 
 from ..chat.transformation import BaseConfig
 
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+    from litellm.router import Router as _Router
 
     LiteLLMLoggingObj = _LiteLLMLoggingObj
+    Span = Any
+    Router = _Router
 else:
     LiteLLMLoggingObj = Any
+    Span = Any
+    Router = Any
 
 
 class BaseFilesConfig(BaseConfig):
@@ -99,3 +105,52 @@ def transform_response(
         raise NotImplementedError(
             "AudioTranscriptionConfig does not need a response transformation for audio transcription models"
         )
+
+
+class BaseFileEndpoints(ABC):
+    @abstractmethod
+    async def acreate_file(
+        self,
+        create_file_request: CreateFileRequest,
+        llm_router: Router,
+        target_model_names_list: List[str],
+        litellm_parent_otel_span: Span,
+    ) -> OpenAIFileObject:
+        pass
+
+    @abstractmethod
+    async def afile_retrieve(
+        self,
+        file_id: str,
+        litellm_parent_otel_span: Optional[Span],
+    ) -> OpenAIFileObject:
+        pass
+
+    @abstractmethod
+    async def afile_list(
+        self,
+        purpose: Optional[OpenAIFilesPurpose],
+        litellm_parent_otel_span: Optional[Span],
+        **data: Dict,
+    ) -> List[OpenAIFileObject]:
+        pass
+
+    @abstractmethod
+    async def afile_delete(
+        self,
+        file_id: str,
+        litellm_parent_otel_span: Optional[Span],
+        llm_router: Router,
+        **data: Dict,
+    ) -> OpenAIFileObject:
+        pass
+
+    @abstractmethod
+    async def afile_content(
+        self,
+        file_id: str,
+        litellm_parent_otel_span: Optional[Span],
+        llm_router: Router,
+        **data: Dict,
+    ) -> str:
+        pass
@@ -1,24 +1,9 @@
 model_list:
-  - model_name: gpt-4o-mini-tts
+  - model_name: "gemini-2.0-flash"
     litellm_params:
-      model: openai/gpt-4o-mini-tts
-      api_key: os.environ/OPENAI_API_KEY
-  - model_name: gpt-3.5-turbo
-    litellm_params:
-      model: azure/chatgpt-v-3
-      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
-      api_version: "2023-05-15"
-      api_key: os.environ/AZURE_API_KEY
-  - model_name: "gpt-4o-azure"
-    litellm_params:
-      model: azure/gpt-4o
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
-  - model_name: fake-openai-endpoint
-    litellm_params:
-      model: openai/fake
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      model: vertex_ai/gemini-2.0-flash
+      vertex_project: my-project-id
+      vertex_location: us-central1
   - model_name: "gpt-4o-mini-openai"
     litellm_params:
       model: gpt-4o-mini