Skip to content

Commit 1672374

Browse files
authored
Merge pull request #1064 from iceljc/test/google-realtime
add gemini file content
2 parents f5c5f8e + c79e35b commit 1672374

File tree

6 files changed

+90
-25
lines changed

6 files changed

+90
-25
lines changed

src/Infrastructure/BotSharp.Core/Files/Services/Instruct/FileInstructService.Pdf.cs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
using BotSharp.Abstraction.Files.Converters;
22
using BotSharp.Abstraction.Instructs.Models;
33
using BotSharp.Abstraction.Instructs;
4-
using BotSharp.Abstraction.Infrastructures;
54

65
namespace BotSharp.Core.Files.Services;
76

@@ -22,14 +21,24 @@ public async Task<string> ReadPdf(string text, List<InstructFileModel> files, In
2221

2322
try
2423
{
24+
var provider = options?.Provider ?? "openai";
2525
var pdfFiles = await DownloadFiles(sessionDir, files);
26-
var images = await ConvertPdfToImages(pdfFiles);
27-
if (images.IsNullOrEmpty()) return content;
26+
27+
var targetFiles = pdfFiles;
28+
if (provider != "google-ai")
29+
{
30+
targetFiles = await ConvertPdfToImages(pdfFiles);
31+
}
32+
33+
if (targetFiles.IsNullOrEmpty())
34+
{
35+
return content;
36+
}
2837

2938
var innerAgentId = options?.AgentId ?? Guid.Empty.ToString();
3039
var instruction = await GetAgentTemplate(innerAgentId, options?.TemplateName);
3140

32-
var completion = CompletionProvider.GetChatCompletion(_services, provider: options?.Provider ?? "openai",
41+
var completion = CompletionProvider.GetChatCompletion(_services, provider: provider,
3342
model: options?.Model ?? "gpt-4o", multiModal: true);
3443
var message = await completion.GetChatCompletions(new Agent()
3544
{
@@ -39,7 +48,7 @@ public async Task<string> ReadPdf(string text, List<InstructFileModel> files, In
3948
{
4049
new RoleDialogModel(AgentRole.User, text)
4150
{
42-
Files = images.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList()
51+
Files = targetFiles.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList()
4352
}
4453
});
4554

src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadImageFn.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public async Task<bool> Execute(RoleDialogModel message)
3535
{
3636
Id = BuiltInAgentId.UtilityAssistant,
3737
Name = "Utility Agent",
38-
Instruction = fromAgent?.Instruction ?? args.UserRequest ?? "Please describe the image(s).",
38+
Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the image(s).",
3939
TemplateDict = new Dictionary<string, object>()
4040
};
4141

src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using BotSharp.Abstraction.Routing;
2+
13
namespace BotSharp.Plugin.FileHandler.Functions;
24

35
public class ReadPdfFn : IFunctionCallback
@@ -25,20 +27,31 @@ public async Task<bool> Execute(RoleDialogModel message)
2527
{
2628
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs);
2729
var conv = _services.GetRequiredService<IConversationService>();
30+
var routingCtx = _services.GetRequiredService<IRoutingContext>();
2831
var agentService = _services.GetRequiredService<IAgentService>();
2932

30-
var wholeDialogs = conv.GetDialogHistory();
31-
var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs);
32-
var agent = await agentService.LoadAgent(BuiltInAgentId.UtilityAssistant);
33-
var fileAgent = new Agent
33+
Agent? fromAgent = null;
34+
if (!string.IsNullOrEmpty(message.CurrentAgentId))
35+
{
36+
fromAgent = await agentService.LoadAgent(message.CurrentAgentId);
37+
}
38+
39+
var agent = new Agent
3440
{
35-
Id = agent?.Id ?? Guid.Empty.ToString(),
36-
Name = agent?.Name ?? "Unkown",
37-
Instruction = !string.IsNullOrWhiteSpace(args?.UserRequest) ? args.UserRequest : "Please describe the pdf file(s).",
41+
Id = BuiltInAgentId.UtilityAssistant,
42+
Name = "Utility Agent",
43+
Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the pdf file(s).",
3844
TemplateDict = new Dictionary<string, object>()
3945
};
4046

41-
var response = await GetChatCompletion(fileAgent, dialogs);
47+
var wholeDialogs = routingCtx.GetDialogs();
48+
if (wholeDialogs.IsNullOrEmpty())
49+
{
50+
wholeDialogs = conv.GetDialogHistory();
51+
}
52+
53+
var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs);
54+
var response = await GetChatCompletion(agent, dialogs);
4255
message.Content = response;
4356
return true;
4457
}

src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Chat/GeminiChatCompletionProvider.cs

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
using System.Text.Json.Nodes;
2-
using BotSharp.Abstraction.Agents;
3-
using BotSharp.Abstraction.Agents.Enums;
4-
using BotSharp.Abstraction.Conversations;
1+
using BotSharp.Abstraction.Files;
2+
using BotSharp.Abstraction.Files.Utilities;
53
using BotSharp.Abstraction.Hooks;
6-
using BotSharp.Abstraction.Loggers;
74
using GenerativeAI;
85
using GenerativeAI.Core;
96
using GenerativeAI.Types;
@@ -43,7 +40,7 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
4340
}
4441

4542
var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
46-
var aiModel = client.CreateGenerativeModel(_model);
43+
var aiModel = client.CreateGenerativeModel(_model.ToModelId());
4744
var (prompt, request) = PrepareOptions(aiModel, agent, conversations);
4845

4946
var response = await aiModel.GenerateContentAsync(request);
@@ -101,7 +98,7 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
10198
}
10299

103100
var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
104-
var chatClient = client.CreateGenerativeModel(_model);
101+
var chatClient = client.CreateGenerativeModel(_model.ToModelId());
105102
var (prompt, messages) = PrepareOptions(chatClient, agent, conversations);
106103

107104
var response = await chatClient.GenerateContentAsync(messages);
@@ -165,7 +162,7 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
165162
public async Task<bool> GetChatCompletionsStreamingAsync(Agent agent, List<RoleDialogModel> conversations, Func<RoleDialogModel, Task> onMessageReceived)
166163
{
167164
var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
168-
var chatClient = client.CreateGenerativeModel(_model);
165+
var chatClient = client.CreateGenerativeModel(_model.ToModelId());
169166
var (prompt, messages) = PrepareOptions(chatClient,agent, conversations);
170167

171168
var asyncEnumerable = chatClient.StreamContentAsync(messages);
@@ -207,6 +204,10 @@ public void SetModelName(string model)
207204
{
208205
var agentService = _services.GetRequiredService<IAgentService>();
209206
var googleSettings = _services.GetRequiredService<GoogleAiSettings>();
207+
var fileStorage = _services.GetRequiredService<IFileStorageService>();
208+
var settingsService = _services.GetRequiredService<ILlmProviderService>();
209+
var settings = settingsService.GetSetting(Provider, _model);
210+
var allowMultiModal = settings != null && settings.MultiModal;
210211
renderedInstructions = [];
211212

212213
// Add settings
@@ -298,7 +299,50 @@ public void SetModelName(string model)
298299
else if (message.Role == AgentRole.User)
299300
{
300301
var text = !string.IsNullOrWhiteSpace(message.Payload) ? message.Payload : message.Content;
301-
contents.Add(new Content(text, AgentRole.User));
302+
var contentParts = new List<Part> { new() { Text = text } };
303+
304+
if (allowMultiModal && !message.Files.IsNullOrEmpty())
305+
{
306+
foreach (var file in message.Files)
307+
{
308+
if (!string.IsNullOrEmpty(file.FileData))
309+
{
310+
var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData);
311+
contentParts.Add(new Part()
312+
{
313+
InlineData = new()
314+
{
315+
MimeType = contentType,
316+
Data = Convert.ToBase64String(bytes)
317+
}
318+
});
319+
}
320+
else if (!string.IsNullOrEmpty(file.FileStorageUrl))
321+
{
322+
var contentType = FileUtility.GetFileContentType(file.FileStorageUrl);
323+
var bytes = fileStorage.GetFileBytes(file.FileStorageUrl);
324+
contentParts.Add(new Part()
325+
{
326+
InlineData = new()
327+
{
328+
MimeType = contentType,
329+
Data = Convert.ToBase64String(bytes)
330+
}
331+
});
332+
}
333+
else if (!string.IsNullOrEmpty(file.FileUrl))
334+
{
335+
contentParts.Add(new Part()
336+
{
337+
FileData = new()
338+
{
339+
FileUri = file.FileUrl
340+
}
341+
});
342+
}
343+
}
344+
}
345+
contents.Add(new Content(contentParts, AgentRole.User));
302346
convPrompts.Add($"{AgentRole.User}: {text}");
303347
}
304348
else if (message.Role == AgentRole.Assistant)

src/Plugins/BotSharp.Plugin.GoogleAI/Providers/ProviderHelper.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ public static class ProviderHelper
77
public static GenerativeAI.GoogleAi GetGeminiClient(string provider, string model, IServiceProvider services)
88
{
99
var aiSettings = services.GetRequiredService<GoogleAiSettings>();
10-
if (aiSettings == null || aiSettings.Gemini ==null || string.IsNullOrEmpty(aiSettings.Gemini.ApiKey))
10+
if (string.IsNullOrEmpty(aiSettings?.Gemini?.ApiKey))
1111
{
1212
var settingsService = services.GetRequiredService<ILlmProviderService>();
1313
var settings = settingsService.GetSetting(provider, model);

src/Plugins/BotSharp.Plugin.OpenAI/Providers/Chat/ChatCompletionProvider.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa
218218
protected (string, IEnumerable<ChatMessage>, ChatCompletionOptions) PrepareOptions(Agent agent, List<RoleDialogModel> conversations)
219219
{
220220
var agentService = _services.GetRequiredService<IAgentService>();
221-
var state = _services.GetRequiredService<IConversationStateService>();
222221
var fileStorage = _services.GetRequiredService<IFileStorageService>();
223222
var settingsService = _services.GetRequiredService<ILlmProviderService>();
224223
var settings = settingsService.GetSetting(Provider, _model);

0 commit comments

Comments
 (0)