From ac4ebc439f9fcfd7e66035109d9453a5ab035e85 Mon Sep 17 00:00:00 2001 From: DavidePaglieri Date: Thu, 3 Apr 2025 14:09:32 +0000 Subject: [PATCH 1/2] fix: execute with retries for gemini --- balrog/client.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/balrog/client.py b/balrog/client.py index 9d942e46..95412534 100644 --- a/balrog/client.py +++ b/balrog/client.py @@ -351,18 +351,18 @@ def generate(self, messages): converted_messages = self.convert_messages(messages) def api_call(): - return self.model.generate_content( + response = self.model.generate_content( converted_messages, generation_config=self.generation_config, ) + # Attempt to extract completion immediately after API call + completion = self.extract_completion(response) + # Return both response and completion if successful + return response, completion - response = self.execute_with_retries(api_call) + # Execute the API call and extraction together with retries + response, completion = self.execute_with_retries(api_call) - def extract_completion_call(): - return self.extract_completion(response) - - completion = self.execute_with_retries(extract_completion_call) - return LLMResponse( model_id=self.model_id, completion=completion, From d5bfca6ff397e6303f11668f2d8703fe0190e077 Mon Sep 17 00:00:00 2001 From: DavidePaglieri Date: Thu, 10 Apr 2025 15:31:29 +0000 Subject: [PATCH 2/2] fix: gemini retry --- balrog/client.py | 74 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/balrog/client.py b/balrog/client.py index 95412534..ccabe97d 100644 --- a/balrog/client.py +++ b/balrog/client.py @@ -2,6 +2,9 @@ import datetime import logging import time +import json +import csv +import os from collections import namedtuple from io import BytesIO @@ -360,29 +363,54 @@ def api_call(): # Return both response and completion if successful return response, completion - # Execute the API call and extraction together with retries - response, completion = self.execute_with_retries(api_call) - - return LLMResponse( - model_id=self.model_id, - completion=completion, - stop_reason=( - getattr(response.candidates[0], "finish_reason", "unknown") - if response and getattr(response, "candidates", []) - else "unknown" - ), - input_tokens=( - getattr(response.usage_metadata, "prompt_token_count", 0) - if response and getattr(response, "usage_metadata", None) - else 0 - ), - output_tokens=( - getattr(response.usage_metadata, "candidates_token_count", 0) - if response and getattr(response, "usage_metadata", None) - else 0 - ), - reasoning=None, - ) + try: + # Execute the API call and extraction together with retries + response, completion = self.execute_with_retries(api_call) + + # Check if the successful response contains an empty completion + if not completion or completion.strip() == "": + logger.warning(f"Gemini returned an empty completion for model {self.model_id}. Returning default empty response.") + return LLMResponse( + model_id=self.model_id, + completion="", + stop_reason="empty_response", + input_tokens=getattr(response.usage_metadata, "prompt_token_count", 0) if response and getattr(response, "usage_metadata", None) else 0, + output_tokens=getattr(response.usage_metadata, "candidates_token_count", 0) if response and getattr(response, "usage_metadata", None) else 0, + reasoning=None, + ) + else: + # If completion is not empty, return the normal response + return LLMResponse( + model_id=self.model_id, + completion=completion, + stop_reason=( + getattr(response.candidates[0], "finish_reason", "unknown") + if response and getattr(response, "candidates", []) + else "unknown" + ), + input_tokens=( + getattr(response.usage_metadata, "prompt_token_count", 0) + if response and getattr(response, "usage_metadata", None) + else 0 + ), + output_tokens=( + getattr(response.usage_metadata, "candidates_token_count", 0) + if response and getattr(response, "usage_metadata", None) + else 0 + ), + reasoning=None, + ) + except Exception as e: + logger.error(f"API call failed after {self.max_retries} retries: {e}. Returning empty completion.") + # Return a default response indicating failure + return LLMResponse( + model_id=self.model_id, + completion="", + stop_reason="error_max_retries", + input_tokens=0, # Assuming 0 tokens consumed if call failed + output_tokens=0, + reasoning=None, + ) class ClaudeWrapper(LLMClientWrapper):