@@ -103,14 +103,17 @@ def _get_config(cls, model_name: str):
103
103
@staticmethod
104
104
def is_completion_exception_retryable (exception : Exception ) -> tuple [bool , bool ]:
105
105
# First check for context length errors that need trimming
106
- if isinstance (exception , openai .BadRequestError ) and "maximum context length" in str (exception ).lower ():
106
+ if (
107
+ isinstance (exception , openai .BadRequestError )
108
+ and "maximum context length" in str (exception ).lower ()
109
+ ):
107
110
return True , True # Retry with message trimming
108
-
111
+
109
112
# Original logic for other retryable errors
110
113
is_retryable = isinstance (exception , openai .InternalServerError ) or isinstance (
111
114
exception , LlmStreamTimeoutError
112
115
)
113
-
116
+
114
117
return is_retryable , False # Regular retry without trimming
115
118
116
119
def generate_text (
@@ -460,9 +463,13 @@ def _get_config(cls, model_name: str):
460
463
@staticmethod
461
464
def is_completion_exception_retryable (exception : Exception ) -> tuple [bool , bool ]:
462
465
# First check for context length errors that need trimming
463
- if isinstance (exception , anthropic .AnthropicError ) and "413" in str (exception ) and "Prompt is too long" in str (exception ):
466
+ if (
467
+ isinstance (exception , anthropic .AnthropicError )
468
+ and "413" in str (exception )
469
+ and "Prompt is too long" in str (exception )
470
+ ):
464
471
return True , True # Retry with message trimming
465
-
472
+
466
473
# Original logic for other retryable errors
467
474
retryable_errors = (
468
475
"overloaded_error" ,
@@ -473,7 +480,7 @@ def is_completion_exception_retryable(exception: Exception) -> tuple[bool, bool]
473
480
isinstance (exception , anthropic .AnthropicError )
474
481
and any (error in str (exception ) for error in retryable_errors )
475
482
) or isinstance (exception , LlmStreamTimeoutError )
476
-
483
+
477
484
return is_retryable , False # Regular retry without trimming
478
485
479
486
@observe (as_type = "generation" , name = "Anthropic Generation" )
@@ -1508,35 +1515,35 @@ def trim_messages_for_context_limit(messages, preserve_first=2, preserve_last=3)
1508
1515
"""
1509
1516
Trims messages from the middle of a list when they're too large for context windows.
1510
1517
Preserves the first and last few messages to maintain conversation coherence.
1511
-
1518
+
1512
1519
Args:
1513
1520
messages: List of Message objects to trim
1514
1521
preserve_first: Number of messages to preserve from the beginning
1515
1522
preserve_last: Number of messages to preserve from the end
1516
-
1523
+
1517
1524
Returns:
1518
1525
A new list with fewer messages, with middle messages summarized
1519
1526
"""
1520
1527
# Always preserve at least the first and last message
1521
1528
preserve_first = max (preserve_first , 1 )
1522
1529
preserve_last = max (preserve_last , 1 )
1523
-
1530
+
1524
1531
if len (messages ) <= preserve_first + preserve_last :
1525
1532
return messages
1526
-
1533
+
1527
1534
trimmed_messages = []
1528
1535
trimmed_messages .extend (messages [:preserve_first ])
1529
-
1536
+
1530
1537
# Add a summary message in the middle
1531
1538
middle_summary = Message (
1532
1539
role = "system" ,
1533
- content = f"{ len (messages ) - preserve_first - preserve_last } messages were removed to reduce context length."
1540
+ content = f"{ len (messages ) - preserve_first - preserve_last } messages were removed to reduce context length." ,
1534
1541
)
1535
1542
trimmed_messages .append (middle_summary )
1536
-
1543
+
1537
1544
# Add the last few messages
1538
1545
trimmed_messages .extend (messages [- preserve_last :])
1539
-
1546
+
1540
1547
return trimmed_messages
1541
1548
1542
1549
def construct_message_from_stream (
0 commit comments