Skip to content
45 changes: 43 additions & 2 deletions doc/configuration/adapters-http.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ require("codecompanion").setup({
return require("codecompanion.adapters").extend("openai_responses", {
schema = {
top_p = {
default = 0
default = 0,
},
},
})
Expand All @@ -115,7 +115,7 @@ require("codecompanion").setup({
return false
end
return true
end
end,
},
},
})
Expand Down Expand Up @@ -389,6 +389,47 @@ require("codecompanion").setup({
```


### Gemini
> This is NOT the Gemini-CLI ACP adapter!

The Gemini adapter uses Google's official [openai-compatible](https://ai.google.dev/gemini-api/docs/openai) endpoint, with one difference: the `reasoning_effort` key is replaced by `thinking_budget`, which controls the number of tokens used by the reasoning process.

```lua
require("codecompanion").setup({
adapters = {
http = {
gemini = function()
return require("codecompanion.adapters").extend("gemini", {
schema = {
thinking_budget = {
default = -1,
},
},
})
end,
},
},
strategies = {
chat = {
adapter = "gemini",
},
inline = {
adapter = "gemini",
},
},
}),
```

The value of `thinking_budget` can be the following:

- `-1`: dynamic thinking (default).
- `0`: disable reasoning.
- A valid positive integer (see [Google's documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget) for supported thinking budgets for each models).

Note that, according to [Gemini documentation](https://ai.google.dev/gemini-api/docs/thinking#summaries), their API endpoints only return thinking _summaries_, not _raw thinking tokens_.
Only the raw tokens are bounded by the `thinking_budget` parameter.
The summary that we can see are not bounded by the budget.

### OpenAI Responses API

CodeCompanion supports OpenAI's [Responses API](https://platform.openai.com/docs/api-reference/responses) out of the box, via a separate adapter:
Expand Down
101 changes: 85 additions & 16 deletions lua/codecompanion/adapters/http/gemini.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
local adapter_utils = require("codecompanion.utils.adapters")
local openai = require("codecompanion.adapters.http.openai")

local CONSTANTS = { thinking_start = "<thought>", thinking_end = "</thought>" }

---@param message string?
---@return string?
local function strip_thinking_tags(message)
if message then
local result = message:gsub("^" .. CONSTANTS.thinking_start, ""):gsub("^" .. CONSTANTS.thinking_end, "")
return result
end
end

---@class CodeCompanion.HTTPAdapter.Gemini : CodeCompanion.HTTPAdapter
return {
name = "gemini",
Expand Down Expand Up @@ -52,7 +63,14 @@ return {
return openai.handlers.tokens(self, data)
end,
form_parameters = function(self, params, messages)
return openai.handlers.form_parameters(self, params, messages)
local processed_params = openai.handlers.form_parameters(self, params, messages)
-- https://ai.google.dev/gemini-api/docs/openai#thinking
processed_params.extra_body =
vim.tbl_deep_extend("force", processed_params.extra_body or {}, { google = { thinking_config = {} } })
local thinking_config = processed_params.extra_body.google.thinking_config
thinking_config.include_thoughts = thinking_config.thinking_budget ~= 0

return processed_params
end,
form_tools = function(self, tools)
return openai.handlers.form_tools(self, tools)
Expand Down Expand Up @@ -99,8 +117,49 @@ return {
return result
end,
chat_output = function(self, data, tools)
return openai.handlers.chat_output(self, data, tools)
local _data = openai.handlers.chat_output(self, data, tools)
if _data then
if _data.output and _data.output.content and _data.output.content:find("^" .. CONSTANTS.thinking_end) then
-- The first non-thinking delta in a streamed response following the reasoning delta will have the thinking tag.
-- strip it.
_data.output.content = strip_thinking_tags(_data.output.content)
end
end
return _data
end,

parse_message_meta = function(self, data)
-- https://ai.google.dev/gemini-api/docs/openai#thinking
local extra_content = data.extra.extra_content
local has_thinking = extra_content and extra_content.google and extra_content.google.thought

if not has_thinking then
-- this delta is either the actual answer after a reasoning sequence, or with reasoning off.
-- in the former case, the sequence might start with a `</thought>` tag. strip it.
return {
status = data.status,
output = { content = strip_thinking_tags(data.output.content), role = data.output.role },
}
end

if self.opts.stream then
-- the `content` field contains the reasoning summary.
-- put it in the `reasoning` field and erase `content` so that it's not mistaken as the response
local reasoning = strip_thinking_tags(data.output.content)
data.output.reasoning = { content = reasoning }
data.output.content = nil
else
-- when not streaming, the reasoning summary and final answer are sent in one big chunk,
-- with the reasoning wrapped in the `<thought></thought>` tags.
local reasoning =
data.output.content:match(string.format("^%s(.*)%s", CONSTANTS.thinking_start, CONSTANTS.thinking_end))
data.output.reasoning = { content = reasoning }
data.output.content = data.output.content:gsub(".*" .. CONSTANTS.thinking_end, "")
end

return data
end,

tools = {
format_tool_calls = function(self, tools)
return openai.handlers.tools.format_tool_calls(self, tools)
Expand All @@ -110,7 +169,11 @@ return {
end,
},
inline_output = function(self, data, context)
return openai.handlers.inline_output(self, data, context)
local inline_output = openai.handlers.inline_output(self, data, context)
if inline_output then
return { status = inline_output.status, output = inline_output.output:gsub("^<thought>.*</thought>", "") }
end
return nil
end,
on_exit = function(self, data)
return openai.handlers.on_exit(self, data)
Expand All @@ -130,11 +193,20 @@ return {
formatted_name = "Gemini 3 Flash",
opts = { can_reason = true, has_vision = true },
},
["gemini-2.5-pro"] = { formatted_name = "Gemini 2.5 Pro", opts = { can_reason = true, has_vision = true } },
["gemini-2.5-flash"] = { formatted_name = "Gemini 2.5 Flash", opts = { can_reason = true, has_vision = true } },
["gemini-2.5-pro"] = {
formatted_name = "Gemini 2.5 Pro",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 128, high = 32768 },
},
["gemini-2.5-flash"] = {
formatted_name = "Gemini 2.5 Flash",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 0, high = 24576 },
},
["gemini-2.5-flash-preview-05-20"] = {
formatted_name = "Gemini 2.5 Flash Preview",
opts = { can_reason = true, has_vision = true },
thinking_budget = { low = 0, high = 24576 },
},
["gemini-2.0-flash"] = { formatted_name = "Gemini 2.0 Flash", opts = { has_vision = true } },
["gemini-2.0-flash-lite"] = { formatted_name = "Gemini 2.0 Flash Lite", opts = { has_vision = true } },
Expand Down Expand Up @@ -179,10 +251,11 @@ return {
end,
},
---@type CodeCompanion.Schema
reasoning_effort = {
thinking_budget = {
-- https://ai.google.dev/gemini-api/docs/thinking#set-budget
order = 5,
mapping = "parameters",
type = "string",
mapping = "parameters.extra_body.google.thinking_config",
type = "integer",
optional = true,
---@type fun(self: CodeCompanion.HTTPAdapter): boolean
enabled = function(self)
Expand All @@ -195,14 +268,10 @@ return {
end
return false
end,
default = "medium",
desc = "Constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.",
choices = {
"high",
"medium",
"low",
"none",
},
-- for models that supports reasoning, this'll be 'dynamic thinking'
default = nil,
-- TODO: validate requires having `self` in the params.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since different models have different thinking budget, we might need to change validate so that it also accepts self in the parameters (something like validate(self, n) instead of the current validate(n). @olimorris would that be alright?

desc = "The thinkingBudget parameter guides the model on the number of thinking tokens to use when generating a response.",
},
},
}
21 changes: 16 additions & 5 deletions lua/codecompanion/interactions/chat/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1219,12 +1219,23 @@ function Chat:done(output, reasoning, tools, meta, opts)

local reasoning_content = nil
if reasoning and not vim.tbl_isempty(reasoning) then
if vim.iter(reasoning):any(function(item)
return item and type(item) ~= "string"
end) then
reasoning_content = adapters.call_handler(self.adapter, "build_reasoning", reasoning)
local build_reasoning_handler = adapters.get_handler(self.adapter, "build_reasoning")
if type(build_reasoning_handler) == "function" then
reasoning_content = build_reasoning_handler(self.adapter, reasoning)
else
reasoning_content = table.concat(reasoning, "")
-- Assume trivial `reasoning` structure: string or `{content: string}`
reasoning_content = {
content = vim
.iter(reasoning)
:map(function(item)
if type(item) == "string" then
return item
elseif type(item) == "table" and item.content then
return item.content
end
end)
:join(""),
}
end
end

Expand Down
25 changes: 25 additions & 0 deletions tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "<thought>This is a dummy thinking summary</thought>Elegant, dynamic.\\n\\nNext question?\\n",
"role": "assistant",
"extra_content": {
"google": {
"thought": true
}
}
}
}
],
"created": 1743460357,
"model": "gemini-2.5-flash",
"object": "chat.completion",
"usage": {
"completion_tokens": 9,
"prompt_tokens": 419,
"total_tokens": 428
}
}
6 changes: 6 additions & 0 deletions tests/adapters/http/stubs/gemini_reasoning_streaming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
data: {"choices":[{"delta":{"content":"<thought>This is a dummy reasoning segment","role":"assistant", "extra_content":{"google":{"thought":true}}},"index":0}],"created":1743460108,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
data: {"choices":[{"delta":{"content":"</thought>Elegant","role":"assistant"},"index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
Tokens: 425
data: {"choices":[{"delta":{"content":", dynamic.\n\nNext question?\n","role":"assistant"},"finish_reason":"stop","index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":9,"prompt_tokens":419,"total_tokens":428}}
Tokens: 428
data: [DONE]
39 changes: 39 additions & 0 deletions tests/adapters/http/test_gemini.lua
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,29 @@ T["Gemini adapter"]["Streaming"]["can output streamed data into the chat buffer"
h.expect_starts_with("Elegant, dynamic", output)
end

T["Gemini adapter"]["Streaming"]["can output streamed data with reasoning into the chat buffer"] = function()
local output = ""
local reasoning = ""
local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_streaming.txt")
for _, line in ipairs(lines) do
local chat_output = adapter.handlers.chat_output(adapter, line)
if chat_output and chat_output.extra then
chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
end
if chat_output then
if chat_output.output.content then
output = output .. chat_output.output.content
end
if chat_output.output.reasoning then
reasoning = reasoning .. chat_output.output.reasoning.content
end
end
end

h.expect_starts_with("Elegant, dynamic", output)
h.expect_starts_with("This is a dummy reasoning segment", reasoning)
end

T["Gemini adapter"]["Streaming"]["can process tools"] = function()
local tools = {}
local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_streaming.txt")
Expand Down Expand Up @@ -194,6 +217,22 @@ T["Gemini adapter"]["No Streaming"]["can output for the chat buffer"] = function
h.expect_starts_with("Elegant, dynamic.", adapter.handlers.chat_output(adapter, json).output.content)
end

T["Gemini adapter"]["No Streaming"]["can output for the chat buffer with reasoning"] = function()
adapter.opts.stream = false
local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt")
data = table.concat(data, "\n")

-- Match the format of the actual request
local json = { body = data }

local chat_output = adapter.handlers.chat_output(adapter, json)
if chat_output and chat_output.extra then
chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
end
h.expect_starts_with("Elegant, dynamic.", chat_output.output.content)
h.expect_starts_with("This is a dummy thinking summary", chat_output.output.reasoning.content)
end

T["Gemini adapter"]["No Streaming"]["can process tools"] = function()
local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_no_streaming.txt")
data = table.concat(data, "\n")
Expand Down