Skip to content

Commit 3971b2d

Browse files
authored
Merge pull request #52 from BalanceBalls/images-support
Images support
2 parents ded59c8 + 6ea08e4 commit 3971b2d

28 files changed

+1172
-301
lines changed

README.md

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ A tool for those who appreciate keyboard driven apps and terminal workflows
1414
* 📦 **Single binary** - lightweight, zero dependencies, use anywhere without any requirements
1515
* 🤖 **Support for OpenAI compatible APIs** (ChatGPT, Mistral, Ollama, LMStudio, llama-cpp and more)
1616
* 🌟 **Support for Gemini API**
17+
* 🔀 **Support for OpenRouter API**
18+
* 🖼️ **Images support**
1719
* 💬 **Chat sessions** management and quick chats
1820
* ⚙️ **Settings presets** (configure different personas with unique settings)
1921
* ✂️ **Convenient text selection** tool (vim-like line selection)
@@ -70,7 +72,7 @@ bash -c "$(curl -fsSL https://raw.githubusercontent.com/BalanceBalls/nekot/main/
7072

7173
## Setting API keys
7274

73-
To use the app, you will need to set `OPENAI_API_KEY` or/and `GEMINI_API_KEY` env variables depending on your needs
75+
To use the app, you will need to set `OPENAI_API_KEY` or/and `GEMINI_API_KEY`, `OPENROUTER_API_KEY` env variables depending on your needs
7476

7577
<details>
7678

@@ -95,6 +97,14 @@ Set up your api key - [how to get an api key](https://aistudio.google.com/apikey
9597
```bash
9698
export GEMINI_API_KEY="some-key" # you would want to export this in your .zshrc or .bashrc
9799
```
100+
101+
### OpenRouter API
102+
103+
Set up your api key - [how to get an api key](https://openrouter.ai/docs/api-reference/authentication)
104+
105+
```bash
106+
export OPENROUTER_API_KEY="some-key" # you would want to export this in your .zshrc or .bashrc
107+
```
98108
</details>
99109

100110
## Config
@@ -110,14 +120,18 @@ We provide a `config.json` file within your directory for easy access to essenti
110120
"systemMessage": "",
111121
"defaultModel": "",
112122
"colorScheme": "groove", // pink, blue, groove
113-
"provider": "openai" // openai, gemini
123+
"provider": "openai", // openai, gemini, openrouter
124+
"maxAttachmentSizeMb": 3,
125+
"includeReasoningTokensInContext": true
114126
}
115127
```
116128

117129
- `providerBaseUrl`: The url can be anything that follows OpenAI API standard ( [ollama](http://localhost:11434), [lmstudio](http://127.0.0.1:1234), etc)
118130
- `chatGPTApiUrl` [obsolete]: same as `providerBaseUrl`
119131
- `systemMessage` field is available for customizing system prompt messages. **Better to set it from the app**
120132
- `defaultModel` field sets the default model. **Better to set it from the app**
133+
- `maxAttachmentSizeMb` field sets maximum allowed image size
134+
- `includeReasoningTokensInContext` field sets whether to include reasoning tokens in the next request or not.
121135

122136
### Providers
123137

@@ -126,9 +140,10 @@ You can change API provider using the `provider` field.
126140
Available providers:
127141
* `openai` **default**
128142
* `gemini`
143+
* `openrouter`
129144

130145
To use **GeminiAPI**, just set `"provider": "gemini"` (make sure to set GEMINI_API_KEY env variable).
131-
When using the `gemini` provider, `providerBaseUrl` param is not used.
146+
When using the `gemini` or `openrouter` providers, `providerBaseUrl` param is not used.
132147

133148
### Themes
134149
You can change colorscheme using the `colorScheme` field.
@@ -159,6 +174,7 @@ To switch between openai and gemini APIs you can use `-p` flag:
159174
```bash
160175
nekot -p openai
161176
nekot -p gemini
177+
nekot -p openrouter
162178
```
163179

164180
### Provider url
@@ -201,6 +217,7 @@ nekot -t blue
201217
\```
202218
- `esc`: Exit insert mode for the prompt
203219
* When in 'Prompt editor' mode, pressing `esc` second time will close editor
220+
- `ctrl+a`: open file picker for attaching images. You can also attach images by typing: [img=/path/to/image]
204221

205222
## Chat Messages Pane
206223

@@ -284,6 +301,7 @@ rc-nekot
284301
nice terminal layouts!
285302
- [bubbles](https://github.com/charmbracelet/bubbles): Some general use
286303
components for Bubble Tea apps!
304+
- [operouter-sdk](https://github.com/reVrost/go-openrouter) openrouter api sdk
287305

288306
## Contributors
289307

clients/gemini.go

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ package clients
22

33
import (
44
"context"
5+
"encoding/base64"
56
"errors"
67
"fmt"
78
"os"
9+
"path/filepath"
810
"strings"
911

1012
"github.com/BalanceBalls/nekot/config"
@@ -36,7 +38,7 @@ func NewGeminiClient(systemMessage string) *GeminiClient {
3638

3739
func (c GeminiClient) RequestCompletion(
3840
ctx context.Context,
39-
chatMsgs []util.MessageToSend,
41+
chatMsgs []util.LocalStoreMessage,
4042
modelSettings util.Settings,
4143
resultChan chan util.ProcessApiCompletionResponse,
4244
) tea.Cmd {
@@ -50,7 +52,8 @@ func (c GeminiClient) RequestCompletion(
5052

5153
client, err := genai.NewClient(ctx, option.WithAPIKey(os.Getenv("GEMINI_API_KEY")))
5254
if err != nil {
53-
return util.MakeErrorMsg(err.Error())
55+
resultChan <- util.ProcessApiCompletionResponse{ID: util.ChunkIndexStart, Err: err, Final: true}
56+
return nil
5457
}
5558
defer client.Close()
5659

@@ -61,7 +64,10 @@ func (c GeminiClient) RequestCompletion(
6164

6265
currentPrompt := chatMsgs[len(chatMsgs)-1].Content
6366
cs := model.StartChat()
64-
cs.History = buildChatHistory(chatMsgs)
67+
cs.History, err = buildChatHistory(chatMsgs, *config.IncludeReasoningTokensInContext)
68+
if err != nil {
69+
return util.MakeErrorMsg(err.Error())
70+
}
6571

6672
iter := cs.SendMessageStream(ctx, genai.Text(currentPrompt))
6773

@@ -133,7 +139,7 @@ func (c GeminiClient) RequestModelsList(ctx context.Context) util.ProcessModelsR
133139
modelsIter := client.ListModels(ctx)
134140

135141
if ctx.Err() == context.DeadlineExceeded {
136-
return util.ProcessModelsResponse{Err: errors.New("Timedout during fetching models")}
142+
return util.ProcessModelsResponse{Err: errors.New("timed out during fetching models")}
137143
}
138144

139145
var modelsList []util.ModelDescription
@@ -319,7 +325,7 @@ func handleFinishReason(reason genai.FinishReason) (string, error) {
319325
return "", nil
320326
}
321327

322-
func buildChatHistory(msgs []util.MessageToSend) []*genai.Content {
328+
func buildChatHistory(msgs []util.LocalStoreMessage, includeReasoning bool) ([]*genai.Content, error) {
323329
chat := []*genai.Content{}
324330

325331
for _, singleMessage := range msgs {
@@ -329,7 +335,7 @@ func buildChatHistory(msgs []util.MessageToSend) []*genai.Content {
329335
}
330336

331337
messageContent := ""
332-
if singleMessage.Resoning != "" {
338+
if singleMessage.Resoning != "" && includeReasoning {
333339
messageContent += singleMessage.Resoning
334340
}
335341
if singleMessage.Content != "" {
@@ -343,11 +349,27 @@ func buildChatHistory(msgs []util.MessageToSend) []*genai.Content {
343349
},
344350
Role: role,
345351
}
352+
353+
if len(singleMessage.Attachments) != 0 {
354+
for _, item := range singleMessage.Attachments {
355+
decodedBytes, err := base64.StdEncoding.DecodeString(item.Content)
356+
357+
if err != nil {
358+
util.Slog.Error("failed to decode file bytes", "item", item.Path, "error", err.Error())
359+
return nil, errors.New("could not prepare attachments for request")
360+
}
361+
362+
extension := filepath.Ext(item.Path)
363+
extension = strings.TrimPrefix(extension, ".")
364+
part := genai.ImageData(extension, decodedBytes)
365+
message.Parts = append(message.Parts, part)
366+
}
367+
}
346368
chat = append(chat, &message)
347369
}
348370

349371
util.Slog.Debug("constructed turn", "data", messageContent)
350372
}
351373

352-
return chat
374+
return chat, nil
353375
}

clients/openai.go

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"net/http"
1111
"net/url"
1212
"os"
13+
"path/filepath"
1314
"strings"
1415

1516
"github.com/BalanceBalls/nekot/config"
@@ -36,7 +37,7 @@ func NewOpenAiClient(apiUrl, systemMessage string) *OpenAiClient {
3637

3738
func (c OpenAiClient) RequestCompletion(
3839
ctx context.Context,
39-
chatMsgs []util.MessageToSend,
40+
chatMsgs []util.LocalStoreMessage,
4041
modelSettings util.Settings,
4142
resultChan chan util.ProcessApiCompletionResponse,
4243
) tea.Cmd {
@@ -80,22 +81,54 @@ func (c OpenAiClient) RequestModelsList(ctx context.Context) util.ProcessModelsR
8081
return processModelsListResponse(resp)
8182
}
8283

83-
func ConstructUserMessage(content string) util.OpenAIConversationTurn {
84+
func constructUserMessage(msg util.LocalStoreMessage) util.OpenAIConversationTurn {
85+
content := []util.OpenAiContent{
86+
{
87+
Type: "text",
88+
Text: msg.Content,
89+
},
90+
}
91+
92+
if len(msg.Attachments) != 0 {
93+
for _, attachment := range msg.Attachments {
94+
data := getImageURLString(attachment)
95+
image := util.OpenAiContent{
96+
Type: "image_url",
97+
ImageURL: util.OpenAiImage{
98+
URL: data,
99+
},
100+
}
101+
content = append(content, image)
102+
}
103+
}
104+
84105
return util.OpenAIConversationTurn{
85-
Role: "user",
106+
Role: msg.Role,
86107
Content: content,
87108
}
88109
}
89110

111+
func getImageURLString(attachment util.Attachment) string {
112+
extension := filepath.Ext(attachment.Path)
113+
extension = strings.TrimPrefix(extension, ".")
114+
content := "data:image/" + extension + ";base64," + attachment.Content
115+
return content
116+
}
117+
90118
func constructSystemMessage(content string) util.OpenAIConversationTurn {
91119
return util.OpenAIConversationTurn{
92-
Role: "system",
93-
Content: content,
120+
Role: "system",
121+
Content: []util.OpenAiContent{
122+
{
123+
Type: "text",
124+
Text: content,
125+
},
126+
},
94127
}
95128
}
96129

97130
func (c OpenAiClient) constructCompletionRequestPayload(
98-
chatMsgs []util.MessageToSend,
131+
chatMsgs []util.LocalStoreMessage,
99132
cfg config.Config,
100133
settings util.Settings,
101134
) ([]byte, error) {
@@ -114,7 +147,7 @@ func (c OpenAiClient) constructCompletionRequestPayload(
114147

115148
for _, singleMessage := range chatMsgs {
116149
messageContent := ""
117-
if singleMessage.Resoning != "" {
150+
if singleMessage.Resoning != "" && *cfg.IncludeReasoningTokensInContext {
118151
messageContent += singleMessage.Resoning
119152
}
120153

@@ -123,13 +156,8 @@ func (c OpenAiClient) constructCompletionRequestPayload(
123156
}
124157

125158
if messageContent != "" {
126-
conversationTurn := util.OpenAIConversationTurn{
127-
Model: singleMessage.Model,
128-
Role: singleMessage.Role,
129-
Content: messageContent,
130-
}
131-
132-
util.Slog.Debug("constructed turn", "data", conversationTurn)
159+
singleMessage.Content = messageContent
160+
conversationTurn := constructUserMessage(singleMessage)
133161
messages = append(messages, conversationTurn)
134162
}
135163
}

0 commit comments

Comments
 (0)