aaif-goose · baxen · Apr 21, 2026 · Apr 21, 2026
diff --git a/crates/goose-test-support/src/session.rs b/crates/goose-test-support/src/session.rs
@@ -1,7 +1,12 @@
 use std::sync::{Arc, Mutex};
 
 pub const TEST_SESSION_ID: &str = "test-session-id";
-pub const TEST_MODEL: &str = "gpt-5-nano";
+// Use a Chat Completions model so the canned SSE fixtures (which return
+// Chat Completions format) are parsed correctly. gpt-5-nano now routes to
+// the Responses API which needs a different mock format.
+// TODO: add a Responses API mock to OpenAiFixture so tests can cover
+// responses-routed models like gpt-5-nano end-to-end.
+pub const TEST_MODEL: &str = "gpt-4.1";
 
 const NOT_YET_SET: &str = "session-id-not-yet-set";
 pub(crate) const SESSION_ID_HEADER: &str = "agent-session-id";

diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs
@@ -138,9 +138,22 @@ impl ModelConfig {
             }
         }
 
-        if let Some(canonical) =
+        // Try canonical lookup with the full model name first, then fall back
+        // to the name with reasoning-effort suffixes stripped (e.g.
+        // "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
+        let canonical =
             crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
-        {
+                .or_else(|| {
+                    let (base, _effort) =
+                        crate::providers::utils::extract_reasoning_effort(&self.model_name);
+                    if base != self.model_name {
+                        crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
+                    } else {
+                        None
+                    }
+                });
+
+        if let Some(canonical) = canonical {
             if self.context_limit.is_none() {
                 self.context_limit = Some(canonical.limit.context);
             }
@@ -299,15 +312,7 @@ impl ModelConfig {
     }
 
     pub fn is_openai_reasoning_model(&self) -> bool {
-        const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
-        const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
-
-        let base = DATABRICKS_MODEL_NAME_PREFIXES
-            .iter()
-            .find_map(|p| self.model_name.strip_prefix(p))
-            .unwrap_or(&self.model_name);
-
-        REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
+        crate::providers::utils::is_openai_responses_model(&self.model_name)
     }
 
     pub fn max_output_tokens(&self) -> i32 {
@@ -499,6 +504,28 @@ mod tests {
             assert_eq!(config.max_tokens, None);
             assert_eq!(config.reasoning, None);
         }
+
+        #[test]
+        fn resolves_after_stripping_reasoning_effort_suffix() {
+            let _guard = env_lock::lock_env([
+                ("GOOSE_MAX_TOKENS", None::<&str>),
+                ("GOOSE_CONTEXT_LIMIT", None::<&str>),
+            ]);
+
+            // "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
+            let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
+                .with_canonical_limits("databricks");
+            assert_eq!(config.context_limit, Some(1_050_000));
+
+            // "gpt-5.4-xhigh" should resolve via "gpt-5.4"
+            let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
+            assert_eq!(config.context_limit, Some(1_050_000));
+
+            // "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
+            let config =
+                ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
+            assert_eq!(config.context_limit, Some(400_000));
+        }
     }
 
     mod is_openai_reasoning_model {

diff --git a/crates/goose/src/providers/databricks.rs b/crates/goose/src/providers/databricks.rs
@@ -269,17 +269,19 @@ impl DatabricksProvider {
     }
 
     fn is_responses_model(model_name: &str) -> bool {
-        let normalized = model_name.to_ascii_lowercase();
-        normalized.contains("codex")
+        super::utils::is_openai_responses_model(model_name)
     }
 
     fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
         if is_embedding {
             "serving-endpoints/text-embedding-3-small/invocations".to_string()
-        } else if Self::is_responses_model(model_name) {
-            "serving-endpoints/responses".to_string()
         } else {
-            format!("serving-endpoints/{}/invocations", model_name)
+            let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
+            if Self::is_responses_model(&clean_name) {
+                "serving-endpoints/responses".to_string()
+            } else {
+                format!("serving-endpoints/{}/invocations", clean_name)
+            }
         }
     }
 
@@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
         Ok(embeddings)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_provider() -> DatabricksProvider {
+        DatabricksProvider {
+            api_client: super::super::api_client::ApiClient::new(
+                "https://example.com".to_string(),
+                super::super::api_client::AuthMethod::NoAuth,
+            )
+            .unwrap(),
+            auth: DatabricksAuth::Token("fake".into()),
+            model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
+            image_format: ImageFormat::OpenAi,
+            retry_config: RetryConfig::default(),
+            fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
+            name: "databricks".into(),
+            token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
+            instance_id: None,
+        }
+    }
+
+    #[test]
+    fn responses_models_route_to_responses_endpoint() {
+        let provider = test_provider();
+
+        for (model_name, expected_path) in [
+            ("gpt-5.4", "serving-endpoints/responses"),
+            ("databricks-gpt-5.4-high", "serving-endpoints/responses"),
+            ("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
+            ("o3-mini", "serving-endpoints/responses"),
+            (
+                "databricks-claude-sonnet-4",
+                "serving-endpoints/databricks-claude-sonnet-4/invocations",
+            ),
+        ] {
+            assert_eq!(
+                provider.get_endpoint_path(model_name, false),
+                expected_path,
+                "unexpected endpoint for {model_name}"
+            );
+        }
+    }
+}
diff --git a/crates/goose/src/providers/formats/databricks.rs b/crates/goose/src/providers/formats/databricks.rs
@@ -2,8 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
 use crate::model::ModelConfig;
 use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
 use crate::providers::utils::{
-    convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
-    sanitize_function_name, ImageFormat,
+    convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
+    is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
+    ImageFormat,
 };
 use anyhow::{anyhow, Error};
 use rmcp::model::{
@@ -581,24 +582,8 @@ pub fn create_request(
         ));
     }
 
-    let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
-    let (model_name, reasoning_effort) = if is_openai_reasoning_model {
-        let parts: Vec<&str> = model_config.model_name.split('-').collect();
-        let last_part = parts.last().unwrap();
-
-        match *last_part {
-            "low" | "medium" | "high" => {
-                let base_name = parts[..parts.len() - 1].join("-");
-                (base_name, Some(last_part.to_string()))
-            }
-            _ => (
-                model_config.model_name.to_string(),
-                Some("medium".to_string()),
-            ),
-        }
-    } else {
-        (model_config.model_name.to_string(), None)
-    };
+    let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
+    let is_openai_reasoning_model = is_openai_responses_model(&model_name);
 
     let system_message = DatabricksMessage {
         role: "system".to_string(),
@@ -1073,6 +1058,63 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
+        let model_config = ModelConfig {
+            model_name: "o3-xhigh".to_string(),
+            context_limit: Some(4096),
+            temperature: None,
+            max_tokens: Some(1024),
+            toolshim: false,
+            toolshim_model: None,
+            fast_model_config: None,
+            request_params: None,
+            reasoning: None,
+        };
+        let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
+        assert_eq!(request["model"], "o3");
+        assert_eq!(request["reasoning_effort"], "xhigh");
+        Ok(())
+    }
+
+    #[test]
+    fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
+        let model_config = ModelConfig {
+            model_name: "o3-none".to_string(),
+            context_limit: Some(4096),
+            temperature: None,
+            max_tokens: Some(1024),
+            toolshim: false,
+            toolshim_model: None,
+            fast_model_config: None,
+            request_params: None,
+            reasoning: None,
+        };
+        let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
+        assert_eq!(request["model"], "o3");
+        assert_eq!(request["reasoning_effort"], "none");
+        Ok(())
+    }
+
+    #[test]
+    fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
+        let model_config = ModelConfig {
+            model_name: "databricks-gpt-5.4-high".to_string(),
+            context_limit: Some(4096),
+            temperature: None,
+            max_tokens: Some(1024),
+            toolshim: false,
+            toolshim_model: None,
+            fast_model_config: None,
+            request_params: None,
+            reasoning: None,
+        };
+        let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
+        assert_eq!(request["model"], "databricks-gpt-5.4");
+        assert_eq!(request["reasoning_effort"], "high");
+        Ok(())
+    }
+
     #[test]
     fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
         let _guard = env_lock::lock_env([

diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs
@@ -4,8 +4,9 @@ use crate::model::ModelConfig;
 use crate::providers::base::{ProviderUsage, Usage};
 use crate::providers::errors::ProviderError;
 use crate::providers::utils::{
-    convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
-    load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
+    convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
+    is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
+    ImageFormat,
 };
 use anyhow::{anyhow, Error};
 use async_stream::try_stream;
@@ -984,7 +985,7 @@ pub fn create_request(
     }
 
     let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
-    let is_reasoning_model = reasoning_effort.is_some();
+    let is_reasoning_model = is_openai_responses_model(&model_name);
 
     let system_message = json!({
         "role": if is_reasoning_model { "developer" } else { "system" },
@@ -1716,7 +1717,8 @@ mod tests {
 
     #[test]
     fn test_create_request_o1_default() -> anyhow::Result<()> {
-        // Test default medium reasoning effort for O1 model
+        // Without an explicit effort suffix the API picks its own default;
+        // we should omit reasoning_effort entirely but still use "developer" role.
         let model_config = ModelConfig {
             model_name: "o1".to_string(),
             context_limit: Some(4096),
@@ -1745,13 +1747,16 @@ mod tests {
                     "content": "system"
                 }
             ],
-            "reasoning_effort": "medium",
             "max_completion_tokens": 1024
         });
 
         for (key, value) in expected.as_object().unwrap() {
             assert_eq!(obj.get(key).unwrap(), value);
         }
+        assert!(
+            obj.get("reasoning_effort").is_none(),
+            "reasoning_effort should be omitted when no explicit suffix is provided"
+        );
 
         Ok(())
     }