feat: default to api temperature

DavidePaglieri · DavidePaglieri · commit f422f3995f84 · 2025-03-03T14:50:38.000Z
diff --git a/balrog/client.py b/balrog/client.py
@@ -181,12 +181,19 @@ def generate(self, messages):
         converted_messages = self.convert_messages(messages)
 
         def api_call():
-            return self.client.chat.completions.create(
-                messages=converted_messages,
-                model=self.model_id,
-                temperature=self.client_kwargs.get("temperature", 0.5),
-                max_tokens=self.client_kwargs.get("max_tokens", 1024),
-            )
+            # Create kwargs for the API call
+            api_kwargs = {
+                "messages": converted_messages,
+                "model": self.model_id,
+                "max_tokens": self.client_kwargs.get("max_tokens", 1024),
+            }
+
+            # Only include temperature if it's not None
+            temperature = self.client_kwargs.get("temperature")
+            if temperature is not None:
+                api_kwargs["temperature"] = temperature
+
+            return self.client.chat.completions.create(**api_kwargs)
 
         response = self.execute_with_retries(api_call)
 
@@ -217,11 +224,16 @@ def _initialize_client(self):
         if not self._initialized:
             self.model = genai.GenerativeModel(self.model_id)
 
+            # Create kwargs dictionary for GenerationConfig
             client_kwargs = {
-                "temperature": self.client_kwargs.get("temperature", 0.5),
                 "max_output_tokens": self.client_kwargs.get("max_tokens", 1024),
             }
 
+            # Only include temperature if it's not None
+            temperature = self.client_kwargs.get("temperature")
+            if temperature is not None:
+                client_kwargs["temperature"] = temperature
+
             self.generation_config = genai.types.GenerationConfig(**client_kwargs)
             self._initialized = True
 
@@ -411,12 +423,19 @@ def generate(self, messages):
         converted_messages = self.convert_messages(messages)
 
         def api_call():
-            return self.client.messages.create(
-                messages=converted_messages,
-                model=self.model_id,
-                temperature=self.client_kwargs.get("temperature", 0.5),
-                max_tokens=self.client_kwargs.get("max_tokens", 1024),
-            )
+            # Create kwargs for the API call
+            api_kwargs = {
+                "messages": converted_messages,
+                "model": self.model_id,
+                "max_tokens": self.client_kwargs.get("max_tokens", 1024),
+            }
+
+            # Only include temperature if it's not None
+            temperature = self.client_kwargs.get("temperature")
+            if temperature is not None:
+                api_kwargs["temperature"] = temperature
+
+            return self.client.messages.create(**api_kwargs)
 
         response = self.execute_with_retries(api_call)
 
diff --git a/balrog/config/config.yaml b/balrog/config/config.yaml
@@ -30,7 +30,7 @@ client:
   model_id: gpt-4o              # Model identifier (e.g., 'gpt-4', 'gpt-3.5-turbo')
   base_url: http://localhost:8080/v1   # Base URL for the API (if using a local server)
   generate_kwargs:
-    temperature: 0.0            # Sampling temperature; 0.0 makes the output deterministic
+    temperature: null           # Sampling temperature. If null the API default temperature is used instead
     max_tokens: 4096            # Max tokens to generate in the response
   timeout: 60                   # Timeout for API requests in seconds
   max_retries: 5                # Max number of retries for failed API calls