Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/my-website/blog/litellm_observatory/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Our focus moving forward is on being the first to detect issues, even when they
The `TestOAIAzureRelease` test is designed to catch a class of bugs that only surface after sustained runtime:

- **Duration**: Runs continuously for 3 hours
- **Behavior**: Cycles through specified models (such as `gpt-4` and `gpt-3.5-turbo`), issuing requests continuously
- **Behavior**: Cycles through specified models (such as `gpt-4` and `gpt-4o`), issuing requests continuously
- **Why 3 Hours**: This helps catch issues where HTTP clients degrade or fail after extended use (for example, a bug observed in LiteLLM v1.81.3)
- **Pass / Fail Criteria**: The test passes if fewer than 1% of requests fail. If the failure rate exceeds 1%, the test fails and we are notified in Slack
- **Key Detail**: The same HTTP client is reused for the entire run, allowing us to detect lifecycle-related bugs that only appear under prolonged reuse
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ curl -X POST 'http://localhost:4000/{my_endpoint}' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer your-api-key' \
-d '{
"model": "gpt-3.5-turbo",
"model": "gpt-4o",
"messages": [{"role": "user", "content": "Hello"}],
"guardrails": ["test"]
}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ Add to `config.yaml`:

```yaml
model_list:
- model_name: gpt-3.5-turbo
- model_name: gpt-4o
litellm_params:
model: openai/gpt-3.5-turbo
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY

prompts:
Expand Down
8 changes: 4 additions & 4 deletions docs/my-website/docs/budget_manager.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if not budget_manager.is_valid_user(user):

# check if a given call can be made
if budget_manager.get_current_cost(user=user) <= budget_manager.get_total_budget(user):
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}])
response = completion(model="gpt-4o", messages=[{"role": "user", "content": "Hey, how's it going?"}])
budget_manager.update_cost(completion_obj=response, user=user)
else:
response = "Sorry - no budget!"
Expand All @@ -72,7 +72,7 @@ budget_manager.create_budget(total_budget=10, user=user, duration="daily")

input_text = "hello world"
output_text = "it's a sunny day in san francisco"
model = "gpt-3.5-turbo"
model = "gpt-4o"

budget_manager.update_cost(user=user, model=model, input_text=input_text, output_text=output_text) # 👈
print(budget_manager.get_current_cost(user))
Expand Down Expand Up @@ -108,7 +108,7 @@ if not budget_manager.is_valid_user(user):

# check if a given call can be made
if budget_manager.get_current_cost(user=user) <= budget_manager.get_total_budget(user):
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}])
response = completion(model="gpt-4o", messages=[{"role": "user", "content": "Hey, how's it going?"}])
budget_manager.update_cost(completion_obj=response, user=user)
else:
response = "Sorry - no budget!"
Expand Down Expand Up @@ -138,7 +138,7 @@ if not budget_manager.is_valid_user(user):

# check if a given call can be made
if budget_manager.get_current_cost(user=user) <= budget_manager.get_total_budget(user):
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}])
response = completion(model="gpt-4o", messages=[{"role": "user", "content": "Hey, how's it going?"}])
budget_manager.update_cost(completion_obj=response, user=user)
else:
response = "Sorry - no budget!"
Expand Down
44 changes: 22 additions & 22 deletions docs/my-website/docs/caching/all_caches.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)

Expand Down Expand Up @@ -77,11 +77,11 @@ litellm.cache = RedisClusterCache(

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)

Expand Down Expand Up @@ -132,11 +132,11 @@ from litellm.caching.caching import Cache
litellm.cache = Cache(type="gcs", gcs_bucket_name="my-cache-bucket", gcs_path_service_account="/path/to/service_account.json")

response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)

Expand Down Expand Up @@ -170,11 +170,11 @@ litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_regio

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)

Expand All @@ -201,11 +201,11 @@ litellm.cache = Cache(type="azure-blob", azure_account_url="https://example.blob

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
)

Expand Down Expand Up @@ -244,7 +244,7 @@ litellm.cache = Cache(
redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here
)
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand All @@ -258,7 +258,7 @@ print(f"response1: {response1}")
random_number = random.randint(1, 100000)

response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand Down Expand Up @@ -301,7 +301,7 @@ litellm.cache = Cache(
)

response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand All @@ -315,7 +315,7 @@ print(f"response1: {response1}")
random_number = random.randint(1, 100000)

response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand Down Expand Up @@ -343,12 +343,12 @@ litellm.cache = Cache()

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
Expand Down Expand Up @@ -379,12 +379,12 @@ litellm.cache = Cache(type="disk")

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
Expand Down Expand Up @@ -416,7 +416,7 @@ Example usage `no-cache` - When `True`, Will not return a cached response

```python
response = litellm.completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand All @@ -435,7 +435,7 @@ Example usage `no-store` - When `True`, Will not cache the response.

```python
response = litellm.completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand All @@ -453,7 +453,7 @@ Example usage `ttl` - cache the response for 10 seconds

```python
response = litellm.completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand All @@ -471,7 +471,7 @@ Example usage `s-maxage` - Will only accept cached responses for 60 seconds

```python
response = litellm.completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[
{
"role": "user",
Expand Down
8 changes: 4 additions & 4 deletions docs/my-website/docs/caching/caching_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
caching=True
)

response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
Expand Down Expand Up @@ -59,7 +59,7 @@ litellm.cache = Cache(type="hosted")

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
caching=True)
Expand All @@ -69,7 +69,7 @@ for chunk in response1:
time.sleep(1) # cache is updated asynchronously

response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
caching=True)
Expand Down
8 changes: 4 additions & 4 deletions docs/my-website/docs/caching/local_caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ litellm.cache = Cache()

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}]
caching=True
)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
caching=True
)
Expand Down Expand Up @@ -55,14 +55,14 @@ litellm.cache = Cache()

# Make completion calls
response1 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
caching=True)
for chunk in response1:
print(chunk)
response2 = completion(
model="gpt-3.5-turbo",
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a joke."}],
stream=True,
caching=True)
Expand Down
4 changes: 2 additions & 2 deletions docs/my-website/docs/completion/audio.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ Use `litellm.supports_audio_input(model="")` -> returns `True` if model can acce
assert litellm.supports_audio_output(model="gpt-4o-audio-preview") == True
assert litellm.supports_audio_input(model="gpt-4o-audio-preview") == True

assert litellm.supports_audio_output(model="gpt-3.5-turbo") == False
assert litellm.supports_audio_input(model="gpt-3.5-turbo") == False
assert litellm.supports_audio_output(model="gpt-4o") == False
assert litellm.supports_audio_input(model="gpt-4o") == False
```
</TabItem>

Expand Down
6 changes: 3 additions & 3 deletions docs/my-website/docs/completion/batching.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ os.environ['OPENAI_API_KEY'] = ""
os.environ['COHERE_API_KEY'] = ""

response = batch_completion_models(
models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"],
models=["gpt-4o", "claude-instant-1.2", "command-nightly"],
messages=[{"role": "user", "content": "Hey, how's it going"}]
)
print(result)
Expand Down Expand Up @@ -203,7 +203,7 @@ os.environ['OPENAI_API_KEY'] = ""
os.environ['COHERE_API_KEY'] = ""

responses = batch_completion_models_all_responses(
models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"],
models=["gpt-4o", "claude-instant-1.2", "command-nightly"],
messages=[{"role": "user", "content": "Hey, how's it going"}]
)
print(responses)
Expand Down Expand Up @@ -259,7 +259,7 @@ print(responses)
"id": "chatcmpl-80szFnKHzCxObW0RqCMw1hWW1Icrq",
"object": "chat.completion",
"created": 1695222061,
"model": "gpt-3.5-turbo-0613",
"model": "gpt-4o-0613",
"choices": [
{
"index": 0,
Expand Down
2 changes: 1 addition & 1 deletion docs/my-website/docs/completion/drop_params.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ client = openai.OpenAI(
)

response = client.chat.completions.create(
model="gpt-3.5-turbo",
model="gpt-4o",
messages = [
{
"role": "user",
Expand Down
Loading
Loading