9
9
10
10
import anthropic
11
11
import tiktoken
12
- from azure .identity import get_bearer_token_provider
13
- from azure . identity import DefaultAzureCredential
12
+ from azure .identity import DefaultAzureCredential , get_bearer_token_provider
13
+
14
14
from eureka_ml_insights .secret_management import get_secret
15
15
16
16
@@ -288,7 +288,7 @@ def get_response(self, request):
288
288
"response_time" : response_time ,
289
289
}
290
290
if "usage" in res :
291
- return response_dict .update ({"usage" : res ["usage" ]})
291
+ response_dict .update ({"usage" : res ["usage" ]})
292
292
return response_dict
293
293
294
294
def handle_request_error (self , e ):
@@ -395,6 +395,7 @@ def create_request(self, text_prompt, query_images=None, system_message=None, pr
395
395
body = str .encode (json .dumps (data ))
396
396
return urllib .request .Request (self .url , body , self .headers )
397
397
398
+
398
399
@dataclass
399
400
class DeepseekR1ServerlessAzureRestEndpointModel (ServerlessAzureRestEndpointModel ):
400
401
# setting temperature to 0.6 as suggested in https://huggingface.co/deepseek-ai/DeepSeek-R1
@@ -410,7 +411,9 @@ def create_request(self, text_prompt, query_images=None, system_message=None, pr
410
411
if previous_messages :
411
412
messages .extend (previous_messages )
412
413
if query_images :
413
- raise NotImplementedError ("Images are not supported for DeepseekR1ServerlessAzureRestEndpointModel endpoints." )
414
+ raise NotImplementedError (
415
+ "Images are not supported for DeepseekR1ServerlessAzureRestEndpointModel endpoints."
416
+ )
414
417
messages .append ({"role" : "user" , "content" : text_prompt })
415
418
data = {
416
419
"messages" : messages ,
@@ -422,6 +425,7 @@ def create_request(self, text_prompt, query_images=None, system_message=None, pr
422
425
body = str .encode (json .dumps (data ))
423
426
return urllib .request .Request (self .url , body , self .headers )
424
427
428
+
425
429
@dataclass
426
430
class OpenAICommonRequestResponseMixIn :
427
431
"""
@@ -470,7 +474,7 @@ def get_response(self, request):
470
474
"response_time" : response_time ,
471
475
}
472
476
if "usage" in openai_response :
473
- return response_dict .update ({"usage" : openai_response ["usage" ]})
477
+ response_dict .update ({"usage" : openai_response ["usage" ]})
474
478
return response_dict
475
479
476
480
@@ -489,7 +493,7 @@ def get_client(self):
489
493
490
494
def handle_request_error (self , e ):
491
495
# if the error is due to a content filter, there is no need to retry
492
- if hasattr (e , ' code' ) and e .code == "content_filter" :
496
+ if hasattr (e , " code" ) and e .code == "content_filter" :
493
497
logging .warning ("Content filtered." )
494
498
response = None
495
499
return response , False , True
@@ -617,7 +621,7 @@ def get_response(self, request):
617
621
"response_time" : response_time ,
618
622
}
619
623
if "usage" in openai_response :
620
- return response_dict .update ({"usage" : openai_response ["usage" ]})
624
+ response_dict .update ({"usage" : openai_response ["usage" ]})
621
625
return response_dict
622
626
623
627
@@ -706,6 +710,7 @@ def create_request(self, text_prompt, query_images=None, system_message=None, pr
706
710
707
711
def get_response (self , request ):
708
712
start_time = time .time ()
713
+ gemini_response = None
709
714
try :
710
715
gemini_response = self .model .generate_content (
711
716
request ,
@@ -717,9 +722,7 @@ def get_response(self, request):
717
722
model_output = gemini_response .parts [0 ].text
718
723
response_time = end_time - start_time
719
724
except Exception as e :
720
- is_non_transient_issue = self .handle_gemini_error (e , gemini_response )
721
- if not is_non_transient_issue :
722
- raise e
725
+ self .handle_gemini_error (e , gemini_response )
723
726
724
727
response_dict = {
725
728
"model_output" : model_output ,
@@ -755,7 +758,7 @@ def handle_gemini_error(self, e, gemini_response):
755
758
logging .warning (
756
759
f"Attempt failed due to explicitly blocked input prompt: { e } Block Reason { gemini_response .prompt_feedback .block_reason } "
757
760
)
758
- return True
761
+
759
762
# Handling cases where the model implicitly blocks prompts and does not provide an explicit block reason for it but rather an empty content.
760
763
# In these cases, there is no need to make a new attempt as the model will continue to implicitly block the request, do_return = True.
761
764
# Note that, in some cases, the model may still provide a finish reason as shown here https://ai.google.dev/api/generate-content?authuser=2#FinishReason
@@ -771,11 +774,11 @@ def handle_gemini_error(self, e, gemini_response):
771
774
logging .warning (
772
775
f"Safety Ratings for the first answer candidate are: { gemini_response .candidates [0 ].safety_ratings } "
773
776
)
774
- return True
775
- # Any other case will be re attempted again, do_return = False.
776
- return False
777
+
778
+ raise e
777
779
778
780
def handle_request_error (self , e ):
781
+ # Any error case not handled in handle_gemini_error will be attempted again, do_return = False.
779
782
return False
780
783
781
784
@@ -1326,19 +1329,25 @@ def get_response(self, request):
1326
1329
def handle_request_error (self , e ):
1327
1330
return False
1328
1331
1332
+
1329
1333
@dataclass
1330
1334
class ClaudeReasoningModel (ClaudeModel ):
1331
1335
"""This class is used to interact with Claude reasoning models through the python api."""
1332
1336
1333
1337
model_name : str = None
1334
- temperature : float = 1.
1338
+ temperature : float = 1.0
1335
1339
max_tokens : int = 20000
1336
1340
timeout : int = 600
1337
1341
thinking_enabled : bool = True
1338
1342
thinking_budget : int = 16000
1339
1343
top_p : float = None
1340
1344
1341
1345
def get_response (self , request ):
1346
+ model_output = None
1347
+ response_time = None
1348
+ thinking_output = None
1349
+ redacted_thinking_output = None
1350
+ response_dict = {}
1342
1351
if self .top_p is not None :
1343
1352
logging .warning ("top_p is not supported for claude reasoning models as of 03/08/2025. It will be ignored." )
1344
1353
@@ -1355,16 +1364,24 @@ def get_response(self, request):
1355
1364
1356
1365
# Loop through completion.content to find the text output
1357
1366
for content in completion .content :
1358
- if content .type == ' text' :
1359
- self . model_output = content .text
1360
- elif content .type == ' thinking' :
1361
- self . thinking_output = content .thinking
1362
- elif content .type == ' redacted_thinking' :
1363
- self . redacted_thinking_output = content .data
1367
+ if content .type == " text" :
1368
+ model_output = content .text
1369
+ elif content .type == " thinking" :
1370
+ thinking_output = content .thinking
1371
+ elif content .type == " redacted_thinking" :
1372
+ redacted_thinking_output = content .data
1364
1373
1365
- self .response_time = end_time - start_time
1374
+ response_time = end_time - start_time
1375
+ response_dict = {
1376
+ "model_output" : model_output ,
1377
+ "response_time" : response_time ,
1378
+ "thinking_output" : thinking_output ,
1379
+ "redacted_thinking_output" : redacted_thinking_output ,
1380
+ }
1366
1381
if hasattr (completion , "usage" ):
1367
- return {"usage" : completion .usage .to_dict ()}
1382
+ response_dict .update ({"usage" : completion .usage .to_dict ()})
1383
+ return response_dict
1384
+
1368
1385
1369
1386
@dataclass
1370
1387
class TestModel (Model ):
0 commit comments