olimorris · Davidyz · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/doc/configuration/adapters-http.md b/doc/configuration/adapters-http.md
@@ -90,7 +90,7 @@ require("codecompanion").setup({
         return require("codecompanion.adapters").extend("openai_responses", {
           schema = {
             top_p = {
-              default = 0
+              default = 0,
             },
           },
         })
@@ -115,7 +115,7 @@ require("codecompanion").setup({
                   return false
                 end
                 return true
-              end
+              end,
             },
           },
         })
@@ -389,6 +389,47 @@ require("codecompanion").setup({
 ```
 
 
+### Gemini
+> This is NOT the Gemini-CLI ACP adapter!
+
+The Gemini adapter uses Google's official [openai-compatible](https://ai.google.dev/gemini-api/docs/openai) endpoint, with one difference: the `reasoning_effort` key is replaced by `thinking_budget`, which controls the number of tokens used by the reasoning process.
+
+```lua 
+require("codecompanion").setup({
+  adapters = {
+    http = {
+      gemini = function()
+        return require("codecompanion.adapters").extend("gemini", {
+          schema = {
+            thinking_budget = {
+              default = -1,
+            },
+          },
+        })
+      end,
+    },
+  },
+  strategies = {
+    chat = {
+      adapter = "gemini",
+    },
+    inline = {
+      adapter = "gemini",
+    },
+  },
+}),
+```
+
+The value of `thinking_budget` can be the following:
+
+- `-1`: dynamic thinking (default).
+- `0`: disable reasoning.
+- A valid positive integer (see [Google's documentation](https://ai.google.dev/gemini-api/docs/thinking#set-budget) for supported thinking budgets for each models).
+
+Note that, according to [Gemini documentation](https://ai.google.dev/gemini-api/docs/thinking#summaries), their API endpoints only return thinking _summaries_, not _raw thinking tokens_.
+Only the raw tokens are bounded by the `thinking_budget` parameter.
+The summary that we can see are not bounded by the budget.
+
 ### OpenAI Responses API
 
 CodeCompanion supports OpenAI's [Responses API](https://platform.openai.com/docs/api-reference/responses) out of the box, via a separate adapter:

diff --git a/lua/codecompanion/adapters/http/gemini.lua b/lua/codecompanion/adapters/http/gemini.lua
@@ -1,6 +1,17 @@
 local adapter_utils = require("codecompanion.utils.adapters")
 local openai = require("codecompanion.adapters.http.openai")
 
+local CONSTANTS = { thinking_start = "<thought>", thinking_end = "</thought>" }
+
+---@param message string?
+---@return string?
+local function strip_thinking_tags(message)
+  if message then
+    local result = message:gsub("^" .. CONSTANTS.thinking_start, ""):gsub("^" .. CONSTANTS.thinking_end, "")
+    return result
+  end
+end
+
 ---@class CodeCompanion.HTTPAdapter.Gemini : CodeCompanion.HTTPAdapter
 return {
   name = "gemini",
@@ -52,7 +63,14 @@ return {
       return openai.handlers.tokens(self, data)
     end,
     form_parameters = function(self, params, messages)
-      return openai.handlers.form_parameters(self, params, messages)
+      local processed_params = openai.handlers.form_parameters(self, params, messages)
+      -- https://ai.google.dev/gemini-api/docs/openai#thinking
+      processed_params.extra_body =
+        vim.tbl_deep_extend("force", processed_params.extra_body or {}, { google = { thinking_config = {} } })
+      local thinking_config = processed_params.extra_body.google.thinking_config
+      thinking_config.include_thoughts = thinking_config.thinking_budget ~= 0
+
+      return processed_params
     end,
     form_tools = function(self, tools)
       return openai.handlers.form_tools(self, tools)
@@ -99,8 +117,49 @@ return {
       return result
     end,
     chat_output = function(self, data, tools)
-      return openai.handlers.chat_output(self, data, tools)
+      local _data = openai.handlers.chat_output(self, data, tools)
+      if _data then
+        if _data.output and _data.output.content and _data.output.content:find("^" .. CONSTANTS.thinking_end) then
+          -- The first non-thinking delta in a streamed response following the reasoning delta will have the thinking tag.
+          -- strip it.
+          _data.output.content = strip_thinking_tags(_data.output.content)
+        end
+      end
+      return _data
+    end,
+
+    parse_message_meta = function(self, data)
+      -- https://ai.google.dev/gemini-api/docs/openai#thinking
+      local extra_content = data.extra.extra_content
+      local has_thinking = extra_content and extra_content.google and extra_content.google.thought
+
+      if not has_thinking then
+        -- this delta is either the actual answer after a reasoning sequence, or with reasoning off.
+        -- in the former case, the sequence might start with a `</thought>` tag. strip it.
+        return {
+          status = data.status,
+          output = { content = strip_thinking_tags(data.output.content), role = data.output.role },
+        }
+      end
+
+      if self.opts.stream then
+        -- the `content` field contains the reasoning summary.
+        -- put it in the `reasoning` field and erase `content` so that it's not mistaken as the response
+        local reasoning = strip_thinking_tags(data.output.content)
+        data.output.reasoning = { content = reasoning }
+        data.output.content = nil
+      else
+        -- when not streaming, the reasoning summary and final answer are sent in one big chunk,
+        -- with the reasoning wrapped in the `<thought></thought>` tags.
+        local reasoning =
+          data.output.content:match(string.format("^%s(.*)%s", CONSTANTS.thinking_start, CONSTANTS.thinking_end))
+        data.output.reasoning = { content = reasoning }
+        data.output.content = data.output.content:gsub(".*" .. CONSTANTS.thinking_end, "")
+      end
+
+      return data
     end,
+
     tools = {
       format_tool_calls = function(self, tools)
         return openai.handlers.tools.format_tool_calls(self, tools)
@@ -110,7 +169,11 @@ return {
       end,
     },
     inline_output = function(self, data, context)
-      return openai.handlers.inline_output(self, data, context)
+      local inline_output = openai.handlers.inline_output(self, data, context)
+      if inline_output then
+        return { status = inline_output.status, output = inline_output.output:gsub("^<thought>.*</thought>", "") }
+      end
+      return nil
     end,
     on_exit = function(self, data)
       return openai.handlers.on_exit(self, data)
@@ -130,11 +193,20 @@ return {
           formatted_name = "Gemini 3 Flash",
           opts = { can_reason = true, has_vision = true },
         },
-        ["gemini-2.5-pro"] = { formatted_name = "Gemini 2.5 Pro", opts = { can_reason = true, has_vision = true } },
-        ["gemini-2.5-flash"] = { formatted_name = "Gemini 2.5 Flash", opts = { can_reason = true, has_vision = true } },
+        ["gemini-2.5-pro"] = {
+          formatted_name = "Gemini 2.5 Pro",
+          opts = { can_reason = true, has_vision = true },
+          thinking_budget = { low = 128, high = 32768 },
+        },
+        ["gemini-2.5-flash"] = {
+          formatted_name = "Gemini 2.5 Flash",
+          opts = { can_reason = true, has_vision = true },
+          thinking_budget = { low = 0, high = 24576 },
+        },
         ["gemini-2.5-flash-preview-05-20"] = {
           formatted_name = "Gemini 2.5 Flash Preview",
           opts = { can_reason = true, has_vision = true },
+          thinking_budget = { low = 0, high = 24576 },
         },
         ["gemini-2.0-flash"] = { formatted_name = "Gemini 2.0 Flash", opts = { has_vision = true } },
         ["gemini-2.0-flash-lite"] = { formatted_name = "Gemini 2.0 Flash Lite", opts = { has_vision = true } },
@@ -179,10 +251,11 @@ return {
       end,
     },
     ---@type CodeCompanion.Schema
-    reasoning_effort = {
+    thinking_budget = {
+      -- https://ai.google.dev/gemini-api/docs/thinking#set-budget
       order = 5,
-      mapping = "parameters",
-      type = "string",
+      mapping = "parameters.extra_body.google.thinking_config",
+      type = "integer",
       optional = true,
       ---@type fun(self: CodeCompanion.HTTPAdapter): boolean
       enabled = function(self)
@@ -195,14 +268,10 @@ return {
         end
         return false
       end,
-      default = "medium",
-      desc = "Constrains effort on reasoning for reasoning models. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.",
-      choices = {
-        "high",
-        "medium",
-        "low",
-        "none",
-      },
+      -- for models that supports reasoning, this'll be 'dynamic thinking'
+      default = nil,
+      -- TODO: validate requires having `self` in the params.
+      desc = "The thinkingBudget parameter guides the model on the number of thinking tokens to use when generating a response.",
     },
   },
 }
diff --git a/lua/codecompanion/interactions/chat/init.lua b/lua/codecompanion/interactions/chat/init.lua
@@ -1219,12 +1219,23 @@ function Chat:done(output, reasoning, tools, meta, opts)
 
   local reasoning_content = nil
   if reasoning and not vim.tbl_isempty(reasoning) then
-    if vim.iter(reasoning):any(function(item)
-      return item and type(item) ~= "string"
-    end) then
-      reasoning_content = adapters.call_handler(self.adapter, "build_reasoning", reasoning)
+    local build_reasoning_handler = adapters.get_handler(self.adapter, "build_reasoning")
+    if type(build_reasoning_handler) == "function" then
+      reasoning_content = build_reasoning_handler(self.adapter, reasoning)
     else
-      reasoning_content = table.concat(reasoning, "")
+      -- Assume trivial `reasoning` structure: string or `{content: string}`
+      reasoning_content = {
+        content = vim
+          .iter(reasoning)
+          :map(function(item)
+            if type(item) == "string" then
+              return item
+            elseif type(item) == "table" and item.content then
+              return item.content
+            end
+          end)
+          :join(""),
+      }
     end
   end
 

diff --git a/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt
@@ -0,0 +1,25 @@
+{
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "<thought>This is a dummy thinking summary</thought>Elegant, dynamic.\\n\\nNext question?\\n",
+                "role": "assistant",
+                "extra_content": {
+                    "google": {
+                        "thought": true
+                    }
+                }
+            }
+        }
+    ],
+    "created": 1743460357,
+    "model": "gemini-2.5-flash",
+    "object": "chat.completion",
+    "usage": {
+        "completion_tokens": 9,
+        "prompt_tokens": 419,
+        "total_tokens": 428
+    }
+}
diff --git a/tests/adapters/http/stubs/gemini_reasoning_streaming.txt b/tests/adapters/http/stubs/gemini_reasoning_streaming.txt
@@ -0,0 +1,6 @@
+data: {"choices":[{"delta":{"content":"<thought>This is a dummy reasoning segment","role":"assistant", "extra_content":{"google":{"thought":true}}},"index":0}],"created":1743460108,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
+data: {"choices":[{"delta":{"content":"</thought>Elegant","role":"assistant"},"index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":0,"prompt_tokens":425,"total_tokens":425}}
+Tokens: 425
+data: {"choices":[{"delta":{"content":", dynamic.\n\nNext question?\n","role":"assistant"},"finish_reason":"stop","index":0}],"created":1743460109,"model":"gemini-2.5-flash","object":"chat.completion.chunk","usage":{"completion_tokens":9,"prompt_tokens":419,"total_tokens":428}}
+Tokens: 428
+data: [DONE]
diff --git a/tests/adapters/http/test_gemini.lua b/tests/adapters/http/test_gemini.lua
@@ -71,6 +71,29 @@ T["Gemini adapter"]["Streaming"]["can output streamed data into the chat buffer"
   h.expect_starts_with("Elegant, dynamic", output)
 end
 
+T["Gemini adapter"]["Streaming"]["can output streamed data with reasoning into the chat buffer"] = function()
+  local output = ""
+  local reasoning = ""
+  local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_streaming.txt")
+  for _, line in ipairs(lines) do
+    local chat_output = adapter.handlers.chat_output(adapter, line)
+    if chat_output and chat_output.extra then
+      chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
+    end
+    if chat_output then
+      if chat_output.output.content then
+        output = output .. chat_output.output.content
+      end
+      if chat_output.output.reasoning then
+        reasoning = reasoning .. chat_output.output.reasoning.content
+      end
+    end
+  end
+
+  h.expect_starts_with("Elegant, dynamic", output)
+  h.expect_starts_with("This is a dummy reasoning segment", reasoning)
+end
+
 T["Gemini adapter"]["Streaming"]["can process tools"] = function()
   local tools = {}
   local lines = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_streaming.txt")
@@ -194,6 +217,22 @@ T["Gemini adapter"]["No Streaming"]["can output for the chat buffer"] = function
   h.expect_starts_with("Elegant, dynamic.", adapter.handlers.chat_output(adapter, json).output.content)
 end
 
+T["Gemini adapter"]["No Streaming"]["can output for the chat buffer with reasoning"] = function()
+  adapter.opts.stream = false
+  local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_reasoning_no_streaming.txt")
+  data = table.concat(data, "\n")
+
+  -- Match the format of the actual request
+  local json = { body = data }
+
+  local chat_output = adapter.handlers.chat_output(adapter, json)
+  if chat_output and chat_output.extra then
+    chat_output = adapter.handlers.parse_message_meta(adapter, chat_output)
+  end
+  h.expect_starts_with("Elegant, dynamic.", chat_output.output.content)
+  h.expect_starts_with("This is a dummy thinking summary", chat_output.output.reasoning.content)
+end
+
 T["Gemini adapter"]["No Streaming"]["can process tools"] = function()
   local data = vim.fn.readfile("tests/adapters/http/stubs/gemini_tools_no_streaming.txt")
   data = table.concat(data, "\n")