Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions doc/usage/chat-buffer/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ This tools enables an LLM to fetch the content from a specific webpage. It will
Use @{fetch_webpage} to tell me what the latest version on neovim.io is
```

This tool supports 3 modes when fetching a website:

- `text` (default): Returns `document.body.innerText`.
- `screenshot`: Returns the image URL of a screenshot of the first screen.
- `pageshot`: Returns the image URL of the full-page screenshot.

The LLM choose which mode to use when they call the tool, and you can ask the LLM to use a specific mode in the chat.
Keep in mind that the `screenshot` and `pageshot` mode only make sense if you're using a multi-modal LLM, in which case you should also give it the `@{fetch_images}` tool so that it can fetch the screenshot/pageshot from the returned URL.


**Options:**
- `adapter` The adapter used to fetch, process and format the webpage's content (Default: `jina`)

Expand Down Expand Up @@ -238,6 +248,20 @@ Use @{web_search} to search neovim.io and explain how I can configure a new lang


Currently, the tool uses [tavily](https://www.tavily.com) and you'll need to ensure that an API key has been set accordingly, as per the [adapter](https://github.com/olimorris/codecompanion.nvim/blob/main/lua/codecompanion/adapters/tavily.lua).
This tool also supports image results in the search that can be consumed by multi-modal LLMs.
To achieve that, you'd also need to give the `@{fetch_images}` tool to the LLM so that it can fetch the images from the URL.

### `fetch_images`

This tool allows the LLM to fetch images from URLs.
Any URL that directly points to an image would work with this tool.
While you could certainly copy-paste URLs to the chat buffer, it's probably more convenient to use this with the `@search_web` tool:

```md
Using the @{web_search} and @{fetch_images} tools, tell me what the logo of codecompanion.nvim look like.
```

**You should only use this tool with a multi-modal LLM.**

## Tool Groups

Expand Down
8 changes: 6 additions & 2 deletions lua/codecompanion/adapters/http/jina.lua
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ return {

self.headers = vim.tbl_deep_extend("force", self.headers, {
["Content-Type"] = "application/json",
["X-Return-Format"] = "text",
["X-Return-Format"] = data.content_format or "text",
["Accept"] = "application/json",
})

Expand Down Expand Up @@ -103,7 +103,11 @@ return {
end
return {
success = "success",
content = data.data.text,
content = {
text = data.data.text,
screenshot = data.data.screenshotUrl,
pageshot = data.data.pageshotUrl,
},
}
end,
},
Expand Down
25 changes: 21 additions & 4 deletions lua/codecompanion/adapters/http/tavily.lua
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ return {
include_answer = opts.include_answer or false,
include_raw_content = opts.include_raw_content or false,
include_domains = data.domains,
include_images = data.include_images,
include_image_descriptions = data.include_images, -- always include descriptions when searching for images.
}

if opts.topic == "news" then
Expand Down Expand Up @@ -75,15 +77,15 @@ return {
end

-- Process results (move existing output logic here)
if body.results == nil or #body.results == 0 then
if (body.results == nil or #body.results == 0) and (body.images == nil or #body.images == 0) then
return {
status = "error",
content = "No results found",
}
end

local output = vim
.iter(body.results)
local text_output = vim
.iter(body.results or {})
:map(function(result)
return {
content = result.content or "",
Expand All @@ -93,9 +95,24 @@ return {
end)
:totable()

local images = vim
.iter(body.images or {})
:map(function(result)
-- https://docs.tavily.com/documentation/api-reference/endpoint/search#response-images
if type(result) == "string" then
return { url = result }
elseif type(result) == "table" then
return { url = result.url, description = result.description }
end
end)
:totable()

return {
status = "success",
content = output,
content = {
text = text_output,
images = images,
},
}
end,
},
Expand Down
4 changes: 4 additions & 0 deletions lua/codecompanion/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ local defaults = {
adapter = "jina",
},
},
["fetch_images"] = {
callback = "interactions.chat.tools.builtin.fetch_images",
description = "Fetches images from the given URL(s).",
},
["file_search"] = {
callback = "interactions.chat.tools.builtin.file_search",
description = "Search for files in the current working directory by glob pattern",
Expand Down
17 changes: 10 additions & 7 deletions lua/codecompanion/interactions/chat/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -953,13 +953,14 @@ end

---Add an image to the chat buffer
---@param image CodeCompanion.Image The image object containing the path and other metadata
---@param opts? {role?: "user"|string, source?: string, bufnr?: integer} Options for adding the image
---@param opts? {role?: "user"|string, source?: string, bufnr?: integer, add_context?: boolean} Options for adding the image
---@return nil
function Chat:add_image_message(image, opts)
opts = vim.tbl_deep_extend("force", {
role = config.constants.USER_ROLE,
source = "codecompanion.interactions.chat.slash_commands.image",
bufnr = image.bufnr,
add_context = true,
}, opts or {})

local id = "<image>" .. (image.id or image.path) .. "</image>"
Expand All @@ -973,12 +974,14 @@ function Chat:add_image_message(image, opts)
visible = false,
})

self.context:add({
bufnr = opts.bufnr,
id = id,
path = image.path,
source = opts.source,
})
if opts.add_context then
self.context:add({
bufnr = opts.bufnr,
id = id,
path = image.path,
source = opts.source,
})
end
end

---Apply any tools or variables that a user has tagged in their message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ local choice = {
return
end

image_utils.from_url(url, { chat_bufnr = SlashCommand.Chat.bufnr }, function(_res)
image_utils.from_url(url, { chat_bufnr = SlashCommand.Chat.bufnr, from = "slash_command" }, function(_res)
if type(_res) == "string" then
return log:error(_res)
end
Expand Down
106 changes: 106 additions & 0 deletions lua/codecompanion/interactions/chat/tools/builtin/fetch_images.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
local im_utils = require("codecompanion.utils.images")

---@class CodeCompanion.Tool.FetchImages: CodeCompanion.Tools.Tool
return {
name = "fetch_images",
cmds = {
---Execute the fetch_webpage tool
---@param tools CodeCompanion.Tools
---@param args {urls: string[]} The arguments from the LLM's tool call
---@param cb function Async callback for completion
function(tools, args, _, cb)
if args.urls == nil then
return { status = "success" }
end
---@type table<string, (CodeCompanion.Image|string)>
local images = {}
local has_image = false

local processed_count = 0
vim.iter(args.urls):each(
---@param url string
function(url)
im_utils.from_url(url, { chat_bufnr = tools.chat.bufnr, from = "tool" }, function(result)
processed_count = processed_count + 1
images[url] = result
if type(result) == "table" then
has_image = true
end
if processed_count == #args.urls then
local status = "success"
if not has_image and #args.urls > 0 then
-- set status to error iff all images failed to load.
status = "error"
end
cb({ status = status, data = images })
end
end)
end
)
end,
},
schema = {
type = "function",
["function"] = {
name = "fetch_images",
description = "Fetches images from the given URL(s).",
parameters = {
type = "object",
properties = {
urls = {
type = "array",
items = { type = "string" },
description = "The URL of the images to fetch from. The URLs must come from the context or previous tool calls.",
},
},
required = { "urls" },
},
},
},
output = {
---@param self CodeCompanion.Tool.FetchImages
---@param tools CodeCompanion.Tools
---@param cmd table The command that was executed
---@param stdout table The output from the command
success = function(self, tools, cmd, stdout)
local chat = tools.chat
local total_count = #cmd.urls
local failed_urls = {}

---@type table<string, (CodeCompanion.Image|string)>
local results = stdout[#stdout]
for url, item in pairs(results) do
if type(item) == "table" then
chat:add_image_message(
item,
{ source = "codecompanion.strategies.chat.tools.fetch_images", add_context = false }
)
else
failed_urls[#failed_urls + 1] = url
end
end

if #failed_urls > 0 then
chat:add_tool_output(
self,
"Failed to fetch images from the following URLs: " .. table.concat(failed_urls, ", "),
string.format(
"Successfully fetched %d images. Failed to fetch from %d URLs",
total_count - #failed_urls,
#failed_urls
)
)
else
chat:add_tool_output(self, string.format("Successfully fetched %d image(s).", total_count))
end
end,

---@param self CodeCompanion.Tool.FetchWebpage
---@param tools CodeCompanion.Tools
---@param cmd table The command that was executed
---@param stderr table The output from the command
error = function(self, tools, cmd, stderr)
tools.chat:add_tool_output(self, "Failed to fetch all images.")
end,
},
}
76 changes: 41 additions & 35 deletions lua/codecompanion/interactions/chat/tools/builtin/fetch_webpage.lua
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ return {
function(self, args, _, cb)
local opts = self.tool.opts
local url = args.url
args.content_format = args.content_format or "text"

if not opts or not opts.adapter then
log:error("[Fetch Webpage Tool] No adapter set for `fetch_webpage`")
Expand All @@ -35,6 +36,13 @@ return {
local adapter = vim.deepcopy(adapters.resolve(tool_adapter))
adapter.methods.tools.fetch_webpage.setup(adapter, args)

if args.content_format ~= "text" then
if type(self.chat.adapter.opts) == "table" and not self.chat.adapter.opts.vision then
log:warn("[Fetcg Webpage Tool] Setting `content_format` to text because the chat adapter disabled vision.")
args.content_format = "text"
end
end

if not url:match("^https?://") then
log:error("[Fetch Webpage Tool] Invalid URL: `%s`", url)
return cb({ status = "error", data = fmt("Invalid URL: `%s`", url) })
Expand All @@ -58,7 +66,14 @@ return {
return cb({ status = "error", data = fmt("Error processing `%s`\n%s", url, output.content) })
end

return cb({ status = "success", data = output.content })
return cb({
status = "success",
data = {
text = (args.content_format == "text") and output.content.text or nil,
screenshot = (args.content_format == "screenshot") and output.content.screenshot or nil,
pageshot = (args.content_format == "pageshot") and output.content.pageshot or nil,
},
})
end
end,
})
Expand All @@ -76,8 +91,20 @@ return {
type = "string",
description = "The URL of the webpage to fetch content from",
},
content_format = {
type = "string",
enum = { "text", "screenshot", "pageshot" },
description = [[How the result should be presented.
- `text`: Returns `document.body.innerText`.
- `screenshot`: Returns the image URL of a screenshot of the first screen.
- `pageshot`: Returns the image URL of the full-page screenshot.
Choose `screenshot` or `pageshot` if you need to know the layout, design or image information of the website AND you have vision capability.
Otherwise, stick to `text`.
When you receive a URL to the screenshot or pageshot, you should call the `fetch_images` tool to see the image.
]],
},
},
required = { "url" },
required = { "url", "content_format" },
},
},
},
Expand All @@ -90,41 +117,20 @@ return {
local args = self.args
local chat = tools.chat

local content
if type(stdout) == "table" then
if #stdout == 1 and type(stdout[1]) == "string" then
content = stdout[1]
elseif #stdout == 1 and type(stdout[1]) == "table" then
-- If stdout[1] is a table, try to extract content
local first_item = stdout[1]
if type(first_item) == "table" and first_item.content then
content = first_item.content
else
-- Fallback: convert to string representation
content = vim.inspect(first_item)
end
else
-- Multiple items or other structure
content = vim
.iter(stdout)
:map(function(item)
if type(item) == "string" then
return item
elseif type(item) == "table" and item.content then
return item.content
else
return vim.inspect(item)
end
end)
:join("\n")
end
else
content = tostring(stdout)
local llm_output
local user_output
local output = stdout[#stdout]
if type(output.text) == "string" then
llm_output = fmt([[<attachment url="%s">%s</attachment>]], args.url, output.text)
user_output = fmt("Fetched content from `%s`", args.url)
elseif type(output.screenshot) == "string" then
llm_output = fmt([[<attachment image_url="%s">Screenshot of %s</attachment>]], output.screenshot, args.url)
user_output = fmt("Fetched screenshot of `%s`", args.url)
elseif type(output.pageshot) == "string" then
llm_output = fmt([[<attachment image_url="%s">Pageshot of %s</attachment>]], output.pageshot, args.url)
user_output = fmt("Fetched pageshot of `%s`", args.url)
end

local llm_output = fmt([[<attachment url="%s">%s</attachment>]], args.url, content)
local user_output = fmt("Fetched content from `%s`", args.url)

chat:add_tool_output(self, llm_output, user_output)
end,

Expand Down
Loading