diff --git a/balrog/client.py b/balrog/client.py
index e4ea50ba..be03166f 100644
--- a/balrog/client.py
+++ b/balrog/client.py
@@ -184,6 +184,7 @@ def api_call():
             return self.client.chat.completions.create(
                 messages=converted_messages,
                 model=self.model_id,
+                temperature=self.client_kwargs.get("temperature", 0.5),
                 max_tokens=self.client_kwargs.get("max_tokens", 1024),
             )
 
diff --git a/balrog/config/config.yaml b/balrog/config/config.yaml
index 2b53786f..7124af07 100644
--- a/balrog/config/config.yaml
+++ b/balrog/config/config.yaml
@@ -30,7 +30,7 @@ client:
   base_url: http://localhost:8080/v1   # Base URL for the API (if using a local server)
   generate_kwargs:
     temperature: 0.0            # Sampling temperature; 0.0 makes the output deterministic
-    max_tokens: 1024            # Max tokens to generate in the response
+    max_tokens: 4096            # Max tokens to generate in the response
   timeout: 60                   # Timeout for API requests in seconds
   max_retries: 5                # Max number of retries for failed API calls
   delay: 2                      # Exponential backoff factor between retries in seconds