Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion crates/goose-test-support/src/session.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use std::sync::{Arc, Mutex};

pub const TEST_SESSION_ID: &str = "test-session-id";
pub const TEST_MODEL: &str = "gpt-5-nano";
// Use a Chat Completions model so the canned SSE fixtures (which return
// Chat Completions format) are parsed correctly. gpt-5-nano now routes to
// the Responses API which needs a different mock format.
// TODO: add a Responses API mock to OpenAiFixture so tests can cover
// responses-routed models like gpt-5-nano end-to-end.
pub const TEST_MODEL: &str = "gpt-4.1";

const NOT_YET_SET: &str = "session-id-not-yet-set";
pub(crate) const SESSION_ID_HEADER: &str = "agent-session-id";
Expand Down
49 changes: 38 additions & 11 deletions crates/goose/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,22 @@ impl ModelConfig {
}
}

if let Some(canonical) =
// Try canonical lookup with the full model name first, then fall back
// to the name with reasoning-effort suffixes stripped (e.g.
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
let canonical =
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
{
.or_else(|| {
let (base, _effort) =
crate::providers::utils::extract_reasoning_effort(&self.model_name);
if base != self.model_name {
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
} else {
None
}
});

if let Some(canonical) = canonical {
if self.context_limit.is_none() {
self.context_limit = Some(canonical.limit.context);
}
Expand Down Expand Up @@ -299,15 +312,7 @@ impl ModelConfig {
}

pub fn is_openai_reasoning_model(&self) -> bool {
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];

let base = DATABRICKS_MODEL_NAME_PREFIXES
.iter()
.find_map(|p| self.model_name.strip_prefix(p))
.unwrap_or(&self.model_name);

REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
crate::providers::utils::is_openai_responses_model(&self.model_name)
}

pub fn max_output_tokens(&self) -> i32 {
Expand Down Expand Up @@ -499,6 +504,28 @@ mod tests {
assert_eq!(config.max_tokens, None);
assert_eq!(config.reasoning, None);
}

#[test]
fn resolves_after_stripping_reasoning_effort_suffix() {
let _guard = env_lock::lock_env([
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
]);

// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
.with_canonical_limits("databricks");
assert_eq!(config.context_limit, Some(1_050_000));

// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
assert_eq!(config.context_limit, Some(1_050_000));

// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
let config =
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
assert_eq!(config.context_limit, Some(400_000));
}
}

mod is_openai_reasoning_model {
Expand Down
57 changes: 52 additions & 5 deletions crates/goose/src/providers/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,17 +269,19 @@ impl DatabricksProvider {
}

fn is_responses_model(model_name: &str) -> bool {
let normalized = model_name.to_ascii_lowercase();
normalized.contains("codex")
super::utils::is_openai_responses_model(model_name)
}

fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
if is_embedding {
"serving-endpoints/text-embedding-3-small/invocations".to_string()
} else if Self::is_responses_model(model_name) {
"serving-endpoints/responses".to_string()
} else {
format!("serving-endpoints/{}/invocations", model_name)
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
if Self::is_responses_model(&clean_name) {
"serving-endpoints/responses".to_string()
} else {
format!("serving-endpoints/{}/invocations", clean_name)
}
}
}

Expand Down Expand Up @@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
Ok(embeddings)
}
}

#[cfg(test)]
mod tests {
use super::*;

fn test_provider() -> DatabricksProvider {
DatabricksProvider {
api_client: super::super::api_client::ApiClient::new(
"https://example.com".to_string(),
super::super::api_client::AuthMethod::NoAuth,
)
.unwrap(),
auth: DatabricksAuth::Token("fake".into()),
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
image_format: ImageFormat::OpenAi,
retry_config: RetryConfig::default(),
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
name: "databricks".into(),
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
instance_id: None,
}
}

#[test]
fn responses_models_route_to_responses_endpoint() {
let provider = test_provider();

for (model_name, expected_path) in [
("gpt-5.4", "serving-endpoints/responses"),
("databricks-gpt-5.4-high", "serving-endpoints/responses"),
("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
("o3-mini", "serving-endpoints/responses"),
(
"databricks-claude-sonnet-4",
"serving-endpoints/databricks-claude-sonnet-4/invocations",
),
] {
assert_eq!(
provider.get_endpoint_path(model_name, false),
expected_path,
"unexpected endpoint for {model_name}"
);
}
}
}
82 changes: 62 additions & 20 deletions crates/goose/src/providers/formats/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
use crate::model::ModelConfig;
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
use crate::providers::utils::{
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
sanitize_function_name, ImageFormat,
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
};
use anyhow::{anyhow, Error};
use rmcp::model::{
Expand Down Expand Up @@ -581,24 +582,8 @@ pub fn create_request(
));
}

let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
let parts: Vec<&str> = model_config.model_name.split('-').collect();
let last_part = parts.last().unwrap();

match *last_part {
"low" | "medium" | "high" => {
let base_name = parts[..parts.len() - 1].join("-");
(base_name, Some(last_part.to_string()))
}
_ => (
model_config.model_name.to_string(),
Some("medium".to_string()),
),
}
} else {
(model_config.model_name.to_string(), None)
};
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_openai_reasoning_model = is_openai_responses_model(&model_name);

let system_message = DatabricksMessage {
role: "system".to_string(),
Expand Down Expand Up @@ -1073,6 +1058,63 @@ mod tests {
Ok(())
}

#[test]
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "o3-xhigh".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "o3");
assert_eq!(request["reasoning_effort"], "xhigh");
Ok(())
}

#[test]
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "o3-none".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "o3");
assert_eq!(request["reasoning_effort"], "none");
Ok(())
}

#[test]
fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
let model_config = ModelConfig {
model_name: "databricks-gpt-5.4-high".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["model"], "databricks-gpt-5.4");
assert_eq!(request["reasoning_effort"], "high");
Ok(())
}

#[test]
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([
Expand Down
15 changes: 10 additions & 5 deletions crates/goose/src/providers/formats/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use crate::model::ModelConfig;
use crate::providers::base::{ProviderUsage, Usage};
use crate::providers::errors::ProviderError;
use crate::providers::utils::{
convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
};
use anyhow::{anyhow, Error};
use async_stream::try_stream;
Expand Down Expand Up @@ -984,7 +985,7 @@ pub fn create_request(
}

let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_reasoning_model = reasoning_effort.is_some();
let is_reasoning_model = is_openai_responses_model(&model_name);

let system_message = json!({
"role": if is_reasoning_model { "developer" } else { "system" },
Expand Down Expand Up @@ -1716,7 +1717,8 @@ mod tests {

#[test]
fn test_create_request_o1_default() -> anyhow::Result<()> {
// Test default medium reasoning effort for O1 model
// Without an explicit effort suffix the API picks its own default;
// we should omit reasoning_effort entirely but still use "developer" role.
let model_config = ModelConfig {
model_name: "o1".to_string(),
context_limit: Some(4096),
Expand Down Expand Up @@ -1745,13 +1747,16 @@ mod tests {
"content": "system"
}
],
"reasoning_effort": "medium",
"max_completion_tokens": 1024
});

for (key, value) in expected.as_object().unwrap() {
assert_eq!(obj.get(key).unwrap(), value);
}
assert!(
obj.get("reasoning_effort").is_none(),
"reasoning_effort should be omitted when no explicit suffix is provided"
);

Ok(())
}
Expand Down
Loading
Loading