diff --git a/doc/configuration/adapters-http.md b/doc/configuration/adapters-http.md index ba7816249..16cf37557 100644 --- a/doc/configuration/adapters-http.md +++ b/doc/configuration/adapters-http.md @@ -90,7 +90,7 @@ require("codecompanion").setup({ return require("codecompanion.adapters").extend("openai_responses", { schema = { top_p = { - default = 0 + default = 0, }, }, }) @@ -115,7 +115,7 @@ require("codecompanion").setup({ return false end return true - end + end, }, }, }) @@ -389,6 +389,47 @@ require("codecompanion").setup({ ``` +### Gemini +> This is NOT the Gemini-CLI ACP adapter! + +The Gemini adapter uses Google's official [openai-compatible](https://ai.google.dev/gemini-api/docs/openai) endpoint, with one difference: the `reasoning_effort` key is replaced by `thinking_budget`, which controls the number of tokens used by the reasoning process. + +```lua +require("codecompanion").setup({ + adapters = { + http = { + gemini = function() + return require("codecompanion.adapters").extend("gemini", { + schema = { + thinking_budget = { + default = -1, + }, + }, + }) + end, + }, + }, + strategies = { + chat = { + adapter = "gemini", + }, + inline = { + adapter = "gemini", + }, + }, +}), +``` + +The value of `thinking_budget` can be the following: + +- `-1`: dynamic thinking (default). +- `0`: disable reasoning. +- A valid positive integer (see [Google's documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget) for supported thinking budgets for each models). + +Note that, according to [Gemini documentation](https://ai.google.dev/gemini-api/docs/thinking#summaries), their API endpoints only return thinking _summaries_, not _raw thinking tokens_. +Only the raw tokens are bounded by the `thinking_budget` parameter. +The summary that we can see are not bounded by the budget. + ### OpenAI Responses API CodeCompanion supports OpenAI's [Responses API](https://platform.openai.com/docs/api-reference/responses) out of the box, via a separate adapter: diff --git a/lua/codecompanion/adapters/http/gemini.lua b/lua/codecompanion/adapters/http/gemini.lua index b4d96bd68..6a1843f45 100644 --- a/lua/codecompanion/adapters/http/gemini.lua +++ b/lua/codecompanion/adapters/http/gemini.lua @@ -1,6 +1,17 @@ local adapter_utils = require("codecompanion.utils.adapters") local openai = require("codecompanion.adapters.http.openai") +local CONSTANTS = { thinking_start = "", thinking_end = "" } + +---@param message string? +---@return string? +local function strip_thinking_tags(message) + if message then + local result = message:gsub("^" .. CONSTANTS.thinking_start, ""):gsub("^" .. CONSTANTS.thinking_end, "") + return result + end +end + ---@class CodeCompanion.HTTPAdapter.Gemini : CodeCompanion.HTTPAdapter return { name = "gemini", @@ -52,7 +63,14 @@ return { return openai.handlers.tokens(self, data) end, form_parameters = function(self, params, messages) - return openai.handlers.form_parameters(self, params, messages) + local processed_params = openai.handlers.form_parameters(self, params, messages) + -- https://ai.google.dev/gemini-api/docs/openai#thinking + processed_params.extra_body = + vim.tbl_deep_extend("force", processed_params.extra_body or {}, { google = { thinking_config = {} } }) + local thinking_config = processed_params.extra_body.google.thinking_config + thinking_config.include_thoughts = thinking_config.thinking_budget ~= 0 + + return processed_params end, form_tools = function(self, tools) return openai.handlers.form_tools(self, tools) @@ -99,8 +117,49 @@ return { return result end, chat_output = function(self, data, tools) - return openai.handlers.chat_output(self, data, tools) + local _data = openai.handlers.chat_output(self, data, tools) + if _data then + if _data.output and _data.output.content and _data.output.content:find("^" .. CONSTANTS.thinking_end) then + -- The first non-thinking delta in a streamed response following the reasoning delta will have the thinking tag. + -- strip it. + _data.output.content = strip_thinking_tags(_data.output.content) + end + end + return _data + end, + + parse_message_meta = function(self, data) + -- https://ai.google.dev/gemini-api/docs/openai#thinking + local extra_content = data.extra.extra_content + local has_thinking = extra_content and extra_content.google and extra_content.google.thought + + if not has_thinking then + -- this delta is either the actual answer after a reasoning sequence, or with reasoning off. + -- in the former case, the sequence might start with a `` tag. strip it. + return { + status = data.status, + output = { content = strip_thinking_tags(data.output.content), role = data.output.role }, + } + end + + if self.opts.stream then + -- the `content` field contains the reasoning summary. + -- put it in the `reasoning` field and erase `content` so that it's not mistaken as the response + local reasoning = strip_thinking_tags(data.output.content) + data.output.reasoning = { content = reasoning } + data.output.content = nil + else + -- when not streaming, the reasoning summary and final answer are sent in one big chunk, + -- with the reasoning wrapped in the `` tags. + local reasoning = + data.output.content:match(string.format("^%s(.*)%s", CONSTANTS.thinking_start, CONSTANTS.thinking_end)) + data.output.reasoning = { content = reasoning } + data.output.content = data.output.content:gsub(".*" .. CONSTANTS.thinking_end, "") + end + + return data end, + tools = { format_tool_calls = function(self, tools) return openai.handlers.tools.format_tool_calls(self, tools) @@ -110,7 +169,11 @@ return { end, }, inline_output = function(self, data, context) - return openai.handlers.inline_output(self, data, context) + local inline_output = openai.handlers.inline_output(self, data, context) + if inline_output then + return { status = inline_output.status, output = inline_output.output:gsub("^.*", "") } + end + return nil end, on_exit = function(self, data) return openai.handlers.on_exit(self, data) @@ -130,11 +193,20 @@ return { formatted_name = "Gemini 3 Flash", opts = { can_reason = true, has_vision = true }, }, - ["gemini-2.5-pro"] = { formatted_name = "Gemini 2.5 Pro", opts = { can_reason = true, has_vision = true } }, - ["gemini-2.5-flash"] = { formatted_name = "Gemini 2.5 Flash", opts = { can_reason = true, has_vision = true } }, + ["gemini-2.5-pro"] = { + formatted_name = "Gemini 2.5 Pro", + opts = { can_reason = true, has_vision = true }, + thinking_budget = { low = 128, high = 32768 }, + }, + ["gemini-2.5-flash"] = { + formatted_name = "Gemini 2.5 Flash", + opts = { can_reason = true, has_vision = true }, + thinking_budget = { low = 0, high = 24576 }, + }, ["gemini-2.5-flash-preview-05-20"] = { formatted_name = "Gemini 2.5 Flash Preview", opts = { can_reason = true, has_vision = true }, + thinking_budget = { low = 0, high = 24576 }, }, ["gemini-2.0-flash"] = { formatted_name = "Gemini 2.0 Flash", opts = { has_vision = true } }, ["gemini-2.0-flash-lite"] = { formatted_name = "Gemini 2.0 Flash Lite", opts = { has_vision = true } }, @@ -179,10 +251,11 @@ return { end, }, ---@type CodeCompanion.Schema - reasoning_effort = { + thinking_budget = { + -- https://ai.google.dev/gemini-api/docs/thinking#set-budget order = 5, - mapping = "parameters", - type = "string", + mapping = "parameters.extra_body.google.thinking_config", + type = "integer", optional = true, ---@type fun(self: CodeCompanion.HTTPAdapter): boolean enabled = function(self) @@ -195,14 +268,10 @@ return { end return false end, - default = "medium", - desc = "Constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.", - choices = { - "high", - "medium", - "low", - "none", - }, + -- for models that supports reasoning, this'll be 'dynamic thinking' + default = nil, + -- TODO: validate requires having `self` in the params. + desc = "The thinkingBudget parameter guides the model on the number of thinking tokens to use when generating a response.", }, }, } diff --git a/lua/codecompanion/interactions/chat/init.lua b/lua/codecompanion/interactions/chat/init.lua index f93c34011..bcceabbdd 100644 --- a/lua/codecompanion/interactions/chat/init.lua +++ b/lua/codecompanion/interactions/chat/init.lua @@ -1219,12 +1219,23 @@ function Chat:done(output, reasoning, tools, meta, opts) local reasoning_content = nil if reasoning and not vim.tbl_isempty(reasoning) then - if vim.iter(reasoning):any(function(item) - return item and type(item) ~= "string" - end) then - reasoning_content = adapters.call_handler(self.adapter, "build_reasoning", reasoning) + local build_reasoning_handler = adapters.get_handler(self.adapter, "build_reasoning") + if type(build_reasoning_handler) == "function" then + reasoning_content = build_reasoning_handler(self.adapter, reasoning) else - reasoning_content = table.concat(reasoning, "") + -- Assume trivial `reasoning` structure: string or `{content: string}` + reasoning_content = { + content = vim + .iter(reasoning) + :map(function(item) + if type(item) == "string" then + return item + elseif type(item) == "table" and item.content then + return item.content + end + end) + :join(""), + } end end diff --git a/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt new file mode 100644 index 000000000..1d9448612 --- /dev/null +++ b/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt @@ -0,0 +1,25 @@ +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "This is a dummy thinking summaryElegant, dynamic.\\n\\nNext question?\\n", + "role": "assistant", + "extra_content": { + "google": { + "thought": true + } + } + } + } + ], + "created": 1743460357, + "model": "gemini-2.5-flash", + "object": "chat.completion", + "usage": { + "completion_tokens": 9, + "prompt_tokens": 419, + "total_tokens": 428 + } +} diff --git a/tests/adapters/http/stubs/gemini_reasoning_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_streaming.txt new file mode 100644 index 000000000..92c7374c0 --- /dev/null +++ b/tests/adapters/http/stubs/gemini_reasoning_streaming.txt @@ -0,0 +1,6 @@ +data: {"choices":[{"delta":{"content":"This is a dummy reasoning segment","role":"assistant", "extra_content":{"google":{"thought":true}}},"index":0}],"created":1743460108,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}} +data: {"choices":[{"delta":{"content":"Elegant","role":"assistant"},"index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}} +Tokens: 425 +data: {"choices":[{"delta":{"content":", dynamic.\n\nNext question?\n","role":"assistant"},"finish_reason":"stop","index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":9,"prompt_tokens":419,"total_tokens":428}} +Tokens: 428 +data: [DONE] diff --git a/tests/adapters/http/test_gemini.lua b/tests/adapters/http/test_gemini.lua index 5f166907c..a705875f9 100644 --- a/tests/adapters/http/test_gemini.lua +++ b/tests/adapters/http/test_gemini.lua @@ -71,6 +71,29 @@ T["Gemini adapter"]["Streaming"]["can output streamed data into the chat buffer" h.expect_starts_with("Elegant, dynamic", output) end +T["Gemini adapter"]["Streaming"]["can output streamed data with reasoning into the chat buffer"] = function() + local output = "" + local reasoning = "" + local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_streaming.txt") + for _, line in ipairs(lines) do + local chat_output = adapter.handlers.chat_output(adapter, line) + if chat_output and chat_output.extra then + chat_output = adapter.handlers.parse_message_meta(adapter, chat_output) + end + if chat_output then + if chat_output.output.content then + output = output .. chat_output.output.content + end + if chat_output.output.reasoning then + reasoning = reasoning .. chat_output.output.reasoning.content + end + end + end + + h.expect_starts_with("Elegant, dynamic", output) + h.expect_starts_with("This is a dummy reasoning segment", reasoning) +end + T["Gemini adapter"]["Streaming"]["can process tools"] = function() local tools = {} local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_streaming.txt") @@ -194,6 +217,22 @@ T["Gemini adapter"]["No Streaming"]["can output for the chat buffer"] = function h.expect_starts_with("Elegant, dynamic.", adapter.handlers.chat_output(adapter, json).output.content) end +T["Gemini adapter"]["No Streaming"]["can output for the chat buffer with reasoning"] = function() + adapter.opts.stream = false + local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt") + data = table.concat(data, "\n") + + -- Match the format of the actual request + local json = { body = data } + + local chat_output = adapter.handlers.chat_output(adapter, json) + if chat_output and chat_output.extra then + chat_output = adapter.handlers.parse_message_meta(adapter, chat_output) + end + h.expect_starts_with("Elegant, dynamic.", chat_output.output.content) + h.expect_starts_with("This is a dummy thinking summary", chat_output.output.reasoning.content) +end + T["Gemini adapter"]["No Streaming"]["can process tools"] = function() local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_no_streaming.txt") data = table.concat(data, "\n")