gemini models bug fix - error handling (#79)

nushib · Besmira Nushi · web-flow · commit 8bebccf01c36 · 2025-01-23T16:20:11.000-08:00
- adds more detail in the warning message when the gemini model response
has no output in parts but may still have candidate answers generated
- fixes the logic around retrying calls for EndpointModels

---------

Co-authored-by: Besmira Nushi &lt;benushi@microsoft.com&gt;
diff --git a/eureka_ml_insights/models/models.py b/eureka_ml_insights/models/models.py
@@ -123,8 +123,8 @@ def generate(self, query_text, *args, **kwargs):
                 logging.warning(f"Attempt {attempts+1}/{self.num_retries} failed: {e}")
                 do_return = self.handle_request_error(e)
                 if do_return:
-                    self.model_output = self.response
-                    self.is_valid = self.is_valid
+                    self.model_output = None
+                    self.is_valid = False
                     break
                 attempts += 1
         else:
@@ -650,7 +650,7 @@ def handle_request_error(self, e):
             e (_type_): Exception occurred during getting a response.
 
         returns:
-            _type_: response, is_valid, do_return (False if the call should not be attempted again).
+            _type_: do_return (True if the call should not be attempted again).
         """
         # Handling cases where the model explicitly blocks prompts and provides a reason for it.
         # In these cases, there is no need to make a new attempt as the model will continue to explicitly block the request, do_return = True.
@@ -659,10 +659,17 @@ def handle_request_error(self, e):
                 f"Attempt failed due to explicitly blocked input prompt: {e} Block Reason {self.gemini_response.prompt_feedback.block_reason}"
             )
             return True
-        # Handling cases where the model implicitly blocks prompts and does not provide a reason for it but rather an empty content.
+        # Handling cases where the model implicitly blocks prompts and does not provide an explicit block reason for it but rather an empty content.
         # In these cases, there is no need to make a new attempt as the model will continue to implicitly block the request, do_return = True.
+        # Note that, in some cases, the model may still provide a finish reason as shown here https://ai.google.dev/api/generate-content?authuser=2#FinishReason
         elif e.__class__.__name__ == "IndexError" and len(self.gemini_response.parts) == 0:
             logging.warning(f"Attempt failed due to implicitly blocked input prompt and empty model output: {e}")
+            # For cases where there are some response candidates do_return is still True because in most cases these candidates are incomplete.
+            # Trying again may not necessarily help, unless in high temperature regimes.
+            if len(self.gemini_response.candidates) > 0:
+                logging.warning(f"The response is not empty and has : {len(self.gemini_response.candidates)} candidates")
+                logging.warning(f"Finish Reason for the first answer candidate is: {self.gemini_response.candidates[0].finish_reason}")
+                logging.warning(f"Safety Ratings for the first answer candidate are: {self.gemini_response.candidates[0].safety_ratings}")
             return True
         # Any other case will be re attempted again, do_return = False.
         return False