diff --git a/doc/configuration/adapters-http.md b/doc/configuration/adapters-http.md
index ba7816249..16cf37557 100644
--- a/doc/configuration/adapters-http.md
+++ b/doc/configuration/adapters-http.md
@@ -90,7 +90,7 @@ require("codecompanion").setup({
return require("codecompanion.adapters").extend("openai_responses", {
schema = {
top_p = {
- default = 0
+ default = 0,
},
},
})
@@ -115,7 +115,7 @@ require("codecompanion").setup({
return false
end
return true
- end
+ end,
},
},
})
@@ -389,6 +389,47 @@ require("codecompanion").setup({
```
+### Gemini
+> This is NOT the Gemini-CLI ACP adapter!
+
+The Gemini adapter uses Google's official [openai-compatible](https://ai.google.dev/gemini-api/docs/openai) endpoint, with one difference: the `reasoning_effort` key is replaced by `thinking_budget`, which controls the number of tokens used by the reasoning process.
+
+```lua
+require("codecompanion").setup({
+ adapters = {
+ http = {
+ gemini = function()
+ return require("codecompanion.adapters").extend("gemini", {
+ schema = {
+ thinking_budget = {
+ default = -1,
+ },
+ },
+ })
+ end,
+ },
+ },
+ strategies = {
+ chat = {
+ adapter = "gemini",
+ },
+ inline = {
+ adapter = "gemini",
+ },
+ },
+}),
+```
+
+The value of `thinking_budget` can be the following:
+
+- `-1`: dynamic thinking (default).
+- `0`: disable reasoning.
+- A valid positive integer (see [Google's documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget) for supported thinking budgets for each models).
+
+Note that, according to [Gemini documentation](https://ai.google.dev/gemini-api/docs/thinking#summaries), their API endpoints only return thinking _summaries_, not _raw thinking tokens_.
+Only the raw tokens are bounded by the `thinking_budget` parameter.
+The summary that we can see are not bounded by the budget.
+
### OpenAI Responses API
CodeCompanion supports OpenAI's [Responses API](https://platform.openai.com/docs/api-reference/responses) out of the box, via a separate adapter:
diff --git a/lua/codecompanion/adapters/http/gemini.lua b/lua/codecompanion/adapters/http/gemini.lua
index b4d96bd68..6a1843f45 100644
--- a/lua/codecompanion/adapters/http/gemini.lua
+++ b/lua/codecompanion/adapters/http/gemini.lua
@@ -1,6 +1,17 @@
local adapter_utils = require("codecompanion.utils.adapters")
local openai = require("codecompanion.adapters.http.openai")
+local CONSTANTS = { thinking_start = "", thinking_end = "" }
+
+---@param message string?
+---@return string?
+local function strip_thinking_tags(message)
+ if message then
+ local result = message:gsub("^" .. CONSTANTS.thinking_start, ""):gsub("^" .. CONSTANTS.thinking_end, "")
+ return result
+ end
+end
+
---@class CodeCompanion.HTTPAdapter.Gemini : CodeCompanion.HTTPAdapter
return {
name = "gemini",
@@ -52,7 +63,14 @@ return {
return openai.handlers.tokens(self, data)
end,
form_parameters = function(self, params, messages)
- return openai.handlers.form_parameters(self, params, messages)
+ local processed_params = openai.handlers.form_parameters(self, params, messages)
+ -- https://ai.google.dev/gemini-api/docs/openai#thinking
+ processed_params.extra_body =
+ vim.tbl_deep_extend("force", processed_params.extra_body or {}, { google = { thinking_config = {} } })
+ local thinking_config = processed_params.extra_body.google.thinking_config
+ thinking_config.include_thoughts = thinking_config.thinking_budget ~= 0
+
+ return processed_params
end,
form_tools = function(self, tools)
return openai.handlers.form_tools(self, tools)
@@ -99,8 +117,49 @@ return {
return result
end,
chat_output = function(self, data, tools)
- return openai.handlers.chat_output(self, data, tools)
+ local _data = openai.handlers.chat_output(self, data, tools)
+ if _data then
+ if _data.output and _data.output.content and _data.output.content:find("^" .. CONSTANTS.thinking_end) then
+ -- The first non-thinking delta in a streamed response following the reasoning delta will have the thinking tag.
+ -- strip it.
+ _data.output.content = strip_thinking_tags(_data.output.content)
+ end
+ end
+ return _data
+ end,
+
+ parse_message_meta = function(self, data)
+ -- https://ai.google.dev/gemini-api/docs/openai#thinking
+ local extra_content = data.extra.extra_content
+ local has_thinking = extra_content and extra_content.google and extra_content.google.thought
+
+ if not has_thinking then
+ -- this delta is either the actual answer after a reasoning sequence, or with reasoning off.
+ -- in the former case, the sequence might start with a `` tag. strip it.
+ return {
+ status = data.status,
+ output = { content = strip_thinking_tags(data.output.content), role = data.output.role },
+ }
+ end
+
+ if self.opts.stream then
+ -- the `content` field contains the reasoning summary.
+ -- put it in the `reasoning` field and erase `content` so that it's not mistaken as the response
+ local reasoning = strip_thinking_tags(data.output.content)
+ data.output.reasoning = { content = reasoning }
+ data.output.content = nil
+ else
+ -- when not streaming, the reasoning summary and final answer are sent in one big chunk,
+ -- with the reasoning wrapped in the `` tags.
+ local reasoning =
+ data.output.content:match(string.format("^%s(.*)%s", CONSTANTS.thinking_start, CONSTANTS.thinking_end))
+ data.output.reasoning = { content = reasoning }
+ data.output.content = data.output.content:gsub(".*" .. CONSTANTS.thinking_end, "")
+ end
+
+ return data
end,
+
tools = {
format_tool_calls = function(self, tools)
return openai.handlers.tools.format_tool_calls(self, tools)
@@ -110,7 +169,11 @@ return {
end,
},
inline_output = function(self, data, context)
- return openai.handlers.inline_output(self, data, context)
+ local inline_output = openai.handlers.inline_output(self, data, context)
+ if inline_output then
+ return { status = inline_output.status, output = inline_output.output:gsub("^.*", "") }
+ end
+ return nil
end,
on_exit = function(self, data)
return openai.handlers.on_exit(self, data)
@@ -130,11 +193,20 @@ return {
formatted_name = "Gemini 3 Flash",
opts = { can_reason = true, has_vision = true },
},
- ["gemini-2.5-pro"] = { formatted_name = "Gemini 2.5 Pro", opts = { can_reason = true, has_vision = true } },
- ["gemini-2.5-flash"] = { formatted_name = "Gemini 2.5 Flash", opts = { can_reason = true, has_vision = true } },
+ ["gemini-2.5-pro"] = {
+ formatted_name = "Gemini 2.5 Pro",
+ opts = { can_reason = true, has_vision = true },
+ thinking_budget = { low = 128, high = 32768 },
+ },
+ ["gemini-2.5-flash"] = {
+ formatted_name = "Gemini 2.5 Flash",
+ opts = { can_reason = true, has_vision = true },
+ thinking_budget = { low = 0, high = 24576 },
+ },
["gemini-2.5-flash-preview-05-20"] = {
formatted_name = "Gemini 2.5 Flash Preview",
opts = { can_reason = true, has_vision = true },
+ thinking_budget = { low = 0, high = 24576 },
},
["gemini-2.0-flash"] = { formatted_name = "Gemini 2.0 Flash", opts = { has_vision = true } },
["gemini-2.0-flash-lite"] = { formatted_name = "Gemini 2.0 Flash Lite", opts = { has_vision = true } },
@@ -179,10 +251,11 @@ return {
end,
},
---@type CodeCompanion.Schema
- reasoning_effort = {
+ thinking_budget = {
+ -- https://ai.google.dev/gemini-api/docs/thinking#set-budget
order = 5,
- mapping = "parameters",
- type = "string",
+ mapping = "parameters.extra_body.google.thinking_config",
+ type = "integer",
optional = true,
---@type fun(self: CodeCompanion.HTTPAdapter): boolean
enabled = function(self)
@@ -195,14 +268,10 @@ return {
end
return false
end,
- default = "medium",
- desc = "Constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.",
- choices = {
- "high",
- "medium",
- "low",
- "none",
- },
+ -- for models that supports reasoning, this'll be 'dynamic thinking'
+ default = nil,
+ -- TODO: validate requires having `self` in the params.
+ desc = "The thinkingBudget parameter guides the model on the number of thinking tokens to use when generating a response.",
},
},
}
diff --git a/lua/codecompanion/interactions/chat/init.lua b/lua/codecompanion/interactions/chat/init.lua
index f93c34011..bcceabbdd 100644
--- a/lua/codecompanion/interactions/chat/init.lua
+++ b/lua/codecompanion/interactions/chat/init.lua
@@ -1219,12 +1219,23 @@ function Chat:done(output, reasoning, tools, meta, opts)
local reasoning_content = nil
if reasoning and not vim.tbl_isempty(reasoning) then
- if vim.iter(reasoning):any(function(item)
- return item and type(item) ~= "string"
- end) then
- reasoning_content = adapters.call_handler(self.adapter, "build_reasoning", reasoning)
+ local build_reasoning_handler = adapters.get_handler(self.adapter, "build_reasoning")
+ if type(build_reasoning_handler) == "function" then
+ reasoning_content = build_reasoning_handler(self.adapter, reasoning)
else
- reasoning_content = table.concat(reasoning, "")
+ -- Assume trivial `reasoning` structure: string or `{content: string}`
+ reasoning_content = {
+ content = vim
+ .iter(reasoning)
+ :map(function(item)
+ if type(item) == "string" then
+ return item
+ elseif type(item) == "table" and item.content then
+ return item.content
+ end
+ end)
+ :join(""),
+ }
end
end
diff --git a/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt
new file mode 100644
index 000000000..1d9448612
--- /dev/null
+++ b/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt
@@ -0,0 +1,25 @@
+{
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "message": {
+ "content": "This is a dummy thinking summaryElegant, dynamic.\\n\\nNext question?\\n",
+ "role": "assistant",
+ "extra_content": {
+ "google": {
+ "thought": true
+ }
+ }
+ }
+ }
+ ],
+ "created": 1743460357,
+ "model": "gemini-2.5-flash",
+ "object": "chat.completion",
+ "usage": {
+ "completion_tokens": 9,
+ "prompt_tokens": 419,
+ "total_tokens": 428
+ }
+}
diff --git a/tests/adapters/http/stubs/gemini_reasoning_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_streaming.txt
new file mode 100644
index 000000000..92c7374c0
--- /dev/null
+++ b/tests/adapters/http/stubs/gemini_reasoning_streaming.txt
@@ -0,0 +1,6 @@
+data: {"choices":[{"delta":{"content":"This is a dummy reasoning segment","role":"assistant", "extra_content":{"google":{"thought":true}}},"index":0}],"created":1743460108,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
+data: {"choices":[{"delta":{"content":"Elegant","role":"assistant"},"index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
+Tokens: 425
+data: {"choices":[{"delta":{"content":", dynamic.\n\nNext question?\n","role":"assistant"},"finish_reason":"stop","index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":9,"prompt_tokens":419,"total_tokens":428}}
+Tokens: 428
+data: [DONE]
diff --git a/tests/adapters/http/test_gemini.lua b/tests/adapters/http/test_gemini.lua
index 5f166907c..a705875f9 100644
--- a/tests/adapters/http/test_gemini.lua
+++ b/tests/adapters/http/test_gemini.lua
@@ -71,6 +71,29 @@ T["Gemini adapter"]["Streaming"]["can output streamed data into the chat buffer"
h.expect_starts_with("Elegant, dynamic", output)
end
+T["Gemini adapter"]["Streaming"]["can output streamed data with reasoning into the chat buffer"] = function()
+ local output = ""
+ local reasoning = ""
+ local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_streaming.txt")
+ for _, line in ipairs(lines) do
+ local chat_output = adapter.handlers.chat_output(adapter, line)
+ if chat_output and chat_output.extra then
+ chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
+ end
+ if chat_output then
+ if chat_output.output.content then
+ output = output .. chat_output.output.content
+ end
+ if chat_output.output.reasoning then
+ reasoning = reasoning .. chat_output.output.reasoning.content
+ end
+ end
+ end
+
+ h.expect_starts_with("Elegant, dynamic", output)
+ h.expect_starts_with("This is a dummy reasoning segment", reasoning)
+end
+
T["Gemini adapter"]["Streaming"]["can process tools"] = function()
local tools = {}
local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_streaming.txt")
@@ -194,6 +217,22 @@ T["Gemini adapter"]["No Streaming"]["can output for the chat buffer"] = function
h.expect_starts_with("Elegant, dynamic.", adapter.handlers.chat_output(adapter, json).output.content)
end
+T["Gemini adapter"]["No Streaming"]["can output for the chat buffer with reasoning"] = function()
+ adapter.opts.stream = false
+ local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt")
+ data = table.concat(data, "\n")
+
+ -- Match the format of the actual request
+ local json = { body = data }
+
+ local chat_output = adapter.handlers.chat_output(adapter, json)
+ if chat_output and chat_output.extra then
+ chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
+ end
+ h.expect_starts_with("Elegant, dynamic.", chat_output.output.content)
+ h.expect_starts_with("This is a dummy thinking summary", chat_output.output.reasoning.content)
+end
+
T["Gemini adapter"]["No Streaming"]["can process tools"] = function()
local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_no_streaming.txt")
data = table.concat(data, "\n")