Add support for claude thinking models (#103)

Holmeswww · web-flow · commit 23e1a87556b7 · 2025-03-12T21:27:14.000-07:00
- New class added for claude reasoning model
- Bump anthropic pip requirement to latest to support thinking requests


Note: conde environment files remain unchanged.

Warning: Current claude API returns several responses (thinking, text,
...) instead of just text. This will likely cause compatibility issues
with multi-turn workflows.
diff --git a/eureka_ml_insights/configs/model_configs.py b/eureka_ml_insights/configs/model_configs.py
@@ -5,6 +5,7 @@
 from eureka_ml_insights.models import (
     AzureOpenAIOModel,
     ClaudeModel,
+    ClaudeReasoningModel,
     DirectOpenAIModel,
     DirectOpenAIOModel,
     GeminiModel,
@@ -201,6 +202,19 @@
     },
 )
 
+CLAUDE_3_7_SONNET_THINKING_CONFIG = ModelConfig(
+    ClaudeReasoningModel,
+    {
+        "secret_key_params": CLAUDE_SECRET_KEY_PARAMS,
+        "model_name": "claude-3-7-sonnet-20250219",
+        "thinking_enabled": True,
+        "thinking_budget": 16000,
+        "max_tokens": 20000, # This number should always be higher than the thinking budget
+        "temperature": 1.0, # As of 03/08/2025, thinking only works with temperature 1.0
+        "timeout": 600, # We set a timeout of 10 minutes for thinking
+    },
+)
+
 CLAUDE_3_5_SONNET_20241022_CONFIG = ModelConfig(
     ClaudeModel,
     {
diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
@@ -14,6 +14,7 @@
 
 from eureka_ml_insights.models import (
     ClaudeModel,
+    ClaudeReasoningModel,
     GeminiModel,
     LlamaServerlessAzureRestEndpointModel,
     MistralServerlessAzureRestEndpointModel,
@@ -452,7 +453,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         usage_completion_read_col = None
         if (self.model_config.class_name is GeminiModel):
             usage_completion_read_col = "candidates_token_count"
-        elif (self.model_config.class_name is ClaudeModel):
+        elif (self.model_config.class_name is ClaudeModel
+              or self.model_config.class_name is ClaudeReasoningModel):
             usage_completion_read_col = "output_tokens"
         elif (self.model_config.class_name is AzureOpenAIOModel
               or self.model_config.class_name is AzureOpenAIModel 
@@ -463,6 +465,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
               or self.model_config.class_name is DirectOpenAIOModel
               or self.model_config.class_name is TogetherModel):
             usage_completion_read_col = "completion_tokens"
+        else:
+            logging.warn(f"Model {self.model_config.class_name} is not recognized for extracting completion token usage.")
         # if the model is one for which the usage of completion tokens is known, use that corresponding column for the model
         # otherwise, use the default "n_output_tokens" which is computed with a universal tokenizer as shown in TokenCounterTransform()
         if usage_completion_read_col:
diff --git a/eureka_ml_insights/models/__init__.py b/eureka_ml_insights/models/__init__.py
@@ -2,6 +2,7 @@
     AzureOpenAIModel,
     AzureOpenAIOModel,
     ClaudeModel,
+    ClaudeReasoningModel,
     DirectOpenAIModel,
     DirectOpenAIOModel,
     GeminiModel,
@@ -32,6 +33,7 @@
     AzureOpenAIModel,
     GeminiModel,
     ClaudeModel,
+    ClaudeReasoningModel,
     MistralServerlessAzureRestEndpointModel,
     LlamaServerlessAzureRestEndpointModel,
     DeepseekR1ServerlessAzureRestEndpointModel,
diff --git a/eureka_ml_insights/models/models.py b/eureka_ml_insights/models/models.py
@@ -1269,6 +1269,45 @@ def get_response(self, request):
     def handle_request_error(self, e):
         return False
 
+@dataclass
+class ClaudeReasoningModel(ClaudeModel):
+    """This class is used to interact with Claude reasoning models through the python api."""
+
+    model_name: str = None
+    temperature: float = 1.
+    max_tokens: int = 20000
+    timeout: int = 600
+    thinking_enabled: bool = True
+    thinking_budget: int = 16000
+    top_p: float = None
+
+    def get_response(self, request):
+        if self.top_p is not None:
+            logging.warning("top_p is not supported for claude reasoning models as of 03/08/2025. It will be ignored.")
+
+        start_time = time.time()
+        thinking = {"type": "enabled", "budget_tokens": self.thinking_budget} if self.thinking_enabled else None
+        completion = self.client.messages.create(
+            model=self.model_name,
+            **request,
+            temperature=self.temperature,
+            thinking=thinking,
+            max_tokens=self.max_tokens,
+        )
+        end_time = time.time()
+
+        # Loop through completion.content to find the text output
+        for content in completion.content:
+            if content.type == 'text':
+                self.model_output = content.text
+            elif content.type == 'thinking':
+                self.thinking_output = content.thinking
+            elif content.type == 'redacted_thinking':
+                self.redacted_thinking_output = content.data
+
+        self.response_time = end_time - start_time
+        if hasattr(completion, "usage"):
+            return {"usage": completion.usage.to_dict()}
 
 @dataclass
 class TestModel(Model):
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
     packages=find_packages(),
     include_package_data=True,
     install_requires=[
-        'anthropic>=0.30.0',
+        'anthropic>=0.49.0',
         'azure-ai-textanalytics>=5.3.0',
         'azure-core>=1.29.5',
         'azure-keyvault-secrets>=4.8.0',