BerriAI · tomukmatthews · May 8, 2025
diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/large_text.py b/tests/large_text.py
diff --git a/tests/litellm/litellm_core_utils/test_token_counter.py b/tests/litellm/litellm_core_utils/test_token_counter.py
@@ -23,7 +23,6 @@
 from litellm import create_pretrained_tokenizer, decode, encode, get_modified_max_tokens
 from litellm import token_counter as token_counter_old
 from litellm.litellm_core_utils.token_counter import token_counter as token_counter_new
-from tests.large_text import text
 
 
 def token_counter_both_assert_same(**args):
@@ -284,15 +283,15 @@ def test_gpt_vision_token_counting():
         "mistral/mistral-tiny",
     ],
 )
-def test_load_test_token_counter(model):
+def test_load_test_token_counter(model, large_text):
     """
     Token count large prompt 100 times.
 
     Assert time taken is < 1.5s.
     """
     import tiktoken
 
-    messages = [{"role": "user", "content": text}] * 10
+    messages = [{"role": "user", "content": large_text}] * 10
 
     start_time = time.time()
     for _ in range(10):

diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
@@ -683,15 +683,15 @@ async def test2():
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync_mode", [True, False])
-async def test_async_router_context_window_fallback(sync_mode):
+async def test_async_router_context_window_fallback(sync_mode, large_text):
     """
     - Give a gpt-4 model group with different context windows (8192k vs. 128k)
     - Send a 10k prompt
     - Assert it works
     """
     import os
 
-    from large_text import text
+    text = large_text
 
     litellm.set_verbose = False
     litellm._turn_on_debug()
@@ -775,15 +775,17 @@ def test_router_rpm_pre_call_check():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
-def test_router_context_window_check_pre_call_check_in_group_custom_model_info():
+def test_router_context_window_check_pre_call_check_in_group_custom_model_info(
+    large_text,
+):
     """
     - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
     - Send a 5k prompt
     - Assert it works
     """
     import os
 
-    from large_text import text
+    text = large_text
 
     litellm.set_verbose = False
 
@@ -829,15 +831,15 @@ def test_router_context_window_check_pre_call_check_in_group_custom_model_info()
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
-def test_router_context_window_check_pre_call_check():
+def test_router_context_window_check_pre_call_check(large_text):
     """
     - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
     - Send a 5k prompt
     - Assert it works
     """
     import os
 
-    from large_text import text
+    text = large_text
 
     litellm.set_verbose = False
 
@@ -883,15 +885,15 @@ def test_router_context_window_check_pre_call_check():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
-def test_router_context_window_check_pre_call_check_out_group():
+def test_router_context_window_check_pre_call_check_out_group(large_text):
     """
     - Give 2 gpt-3.5-turbo model groups with different context windows (4k vs. 16k)
     - Send a 5k prompt
     - Assert it works
     """
     import os
 
-    from large_text import text
+    text = large_text
 
     litellm.set_verbose = False
 
@@ -1136,7 +1138,7 @@ async def test_aimg_gen_on_router():
                     "api_base": os.getenv("AZURE_SWEDEN_API_BASE"),
                     "api_key": os.getenv("AZURE_SWEDEN_API_KEY"),
                 },
-            }
+            },
         ]
         router = Router(model_list=model_list, num_retries=3)
         response = await router.aimage_generation(
@@ -2787,4 +2789,4 @@ def test_router_get_model_group_info():
     assert model_group_info is not None
     assert model_group_info.model_group == "gpt-4"
     assert model_group_info.input_cost_per_token > 0
-    assert model_group_info.output_cost_per_token > 0
+    assert model_group_info.output_cost_per_token > 0
diff --git a/tests/old_proxy_tests/tests/load_test_completion.py b/tests/old_proxy_tests/tests/load_test_completion.py
@@ -4,7 +4,6 @@
 from openai import AsyncOpenAI, AsyncAzureOpenAI
 import uuid
 import traceback
-from large_text import text
 from dotenv import load_dotenv
 from statistics import mean, median
 

diff --git a/tests/test_fallbacks.py b/tests/test_fallbacks.py
@@ -3,7 +3,6 @@
 import pytest
 import asyncio
 import aiohttp
-from large_text import text
 import time
 from typing import Optional
 
@@ -77,14 +76,14 @@ async def chat_completion(
 
 
 @pytest.mark.asyncio
-async def test_chat_completion():
+async def test_chat_completion(large_text):
     """
     make chat completion call with prompt > context window. expect it to work with fallback
     """
     async with aiohttp.ClientSession() as session:
         model = "gpt-3.5-turbo"
         messages = [
-            {"role": "system", "content": text},
+            {"role": "system", "content": large_text},
             {"role": "user", "content": "Who was Alexander?"},
         ]
         await chat_completion(
@@ -133,14 +132,14 @@ async def test_chat_completion_client_fallbacks(has_access):
 
 
 @pytest.mark.asyncio
-async def test_chat_completion_with_retries():
+async def test_chat_completion_with_retries(large_text):
     """
     make chat completion call with prompt > context window. expect it to work with fallback
     """
     async with aiohttp.ClientSession() as session:
         model = "fake-openai-endpoint-4"
         messages = [
-            {"role": "system", "content": text},
+            {"role": "system", "content": large_text},
             {"role": "user", "content": "Who was Alexander?"},
         ]
         response, headers = await chat_completion(
@@ -157,14 +156,14 @@ async def test_chat_completion_with_retries():
 
 
 @pytest.mark.asyncio
-async def test_chat_completion_with_fallbacks():
+async def test_chat_completion_with_fallbacks(large_text):
     """
     make chat completion call with prompt > context window. expect it to work with fallback
     """
     async with aiohttp.ClientSession() as session:
         model = "badly-configured-openai-endpoint"
         messages = [
-            {"role": "system", "content": text},
+            {"role": "system", "content": large_text},
             {"role": "user", "content": "Who was Alexander?"},
         ]
         response, headers = await chat_completion(
@@ -180,14 +179,14 @@ async def test_chat_completion_with_fallbacks():
 
 
 @pytest.mark.asyncio
-async def test_chat_completion_with_timeout():
+async def test_chat_completion_with_timeout(large_text):
     """
     make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
     """
     async with aiohttp.ClientSession() as session:
         model = "fake-openai-endpoint-5"
         messages = [
-            {"role": "system", "content": text},
+            {"role": "system", "content": large_text},
             {"role": "user", "content": "Who was Alexander?"},
         ]
         start_time = time.time()
@@ -208,14 +207,14 @@ async def test_chat_completion_with_timeout():
 
 
 @pytest.mark.asyncio
-async def test_chat_completion_with_timeout_from_request():
+async def test_chat_completion_with_timeout_from_request(large_text):
     """
     make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
     """
     async with aiohttp.ClientSession() as session:
         model = "fake-openai-endpoint-5"
         messages = [
-            {"role": "system", "content": text},
+            {"role": "system", "content": large_text},
             {"role": "user", "content": "Who was Alexander?"},
         ]
         extra_headers = {