Skip to content

Commit a890246

Browse files
authored
Merge pull request #664 from joakimriedel/joakim/support-gpt4o
Use new tokenizer from Microsoft to support newer gpt4o model.
2 parents 4a1251c + c631ef0 commit a890246

5 files changed

+28
-28
lines changed

src/Directory.Packages.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
<PackageVersion Include="ILMerge.Fody" Version="1.24.0" />
1313
<PackageVersion Include="JetBrains.Annotations" Version="2024.2.0" />
1414
<PackageVersion Include="Microsoft.CSharp" Version="4.7.0" />
15-
<PackageVersion Include="Microsoft.DeepDev.TokenizerLib" Version="[1.3.3]" />
15+
<PackageVersion Include="Microsoft.ML.Tokenizers" Version="0.22.0-preview.24378.1" />
1616
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
1717
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="8.0.0" />
1818
<PackageVersion Include="Microsoft.VisualStudio.SDK" Version="[15.0.1]" />

src/ResXManager.Translators/AzureOpenAITranslator.cs

+17-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
namespace ResXManager.Translators;
22

3-
using global::Microsoft.DeepDev;
3+
using global::Microsoft.ML.Tokenizers;
44
using Newtonsoft.Json;
55
using ResXManager.Infrastructure;
66
using System;
@@ -138,11 +138,11 @@ private sealed class ChatCompletionsResponse
138138

139139
private async Task TranslateUsingChatModel(ITranslationSession translationSession, HttpClient client)
140140
{
141-
const string ApiVersion = "2023-05-15";
141+
const string ApiVersion = "2024-06-01";
142142
var endpointUri = new Uri($"/openai/deployments/{ModelDeploymentName}/chat/completions?api-version={ApiVersion}", UriKind.Relative);
143-
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
143+
var tokenizer = TiktokenTokenizer.CreateForModel(
144144
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
145-
).ConfigureAwait(false);
145+
);
146146

147147
var retries = 0;
148148

@@ -200,7 +200,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
200200
}
201201

202202
private IEnumerable<(ChatMessage message, ICollection<ITranslationItem> items)> PackChatModelMessagesIntoBatches(
203-
ITranslationSession translationSession, IEnumerable<ITranslationItem> items, CultureInfo targetCulture, ITokenizer tokenizer
203+
ITranslationSession translationSession, IEnumerable<ITranslationItem> items, CultureInfo targetCulture, TiktokenTokenizer tokenizer
204204
)
205205
{
206206
var batchItems = new List<ITranslationItem>();
@@ -209,7 +209,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
209209

210210
foreach (var item in items)
211211
{
212-
var currentBatch = batchItems.Concat(new[] { item }).ToList();
212+
var currentBatch = batchItems.Concat([item]).ToList();
213213

214214
var currentMessage = GenerateChatModelMessageForTranslations(translationSession, currentBatch, targetCulture);
215215
if (currentMessage?.Content is null)
@@ -218,7 +218,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
218218
continue;
219219
}
220220

221-
var tokens = tokenizer.Encode(currentMessage.Content, new List<string>()).Count;
221+
var tokens = tokenizer.CountTokens(currentMessage.Content);
222222
if (tokens > PromptTokens)
223223
{
224224
translationSession.AddMessage($"Prompt for resource would exceed {PromptTokens} tokens: {item.Source.Substring(0, 20)}...");
@@ -235,7 +235,7 @@ private async Task TranslateUsingChatModel(ITranslationSession translationSessio
235235
{
236236
yield return (batchMessage, batchItems);
237237

238-
batchItems = new List<ITranslationItem>();
238+
batchItems = [];
239239
batchTokens = 0;
240240
}
241241

@@ -414,11 +414,11 @@ private sealed class CompletionsResponse
414414

415415
private async Task TranslateUsingCompletionsModel(ITranslationSession translationSession, HttpClient client)
416416
{
417-
const string ApiVersion = "2023-05-15";
417+
const string ApiVersion = "2024-06-01";
418418
var endpointUri = new Uri($"/openai/deployments/{ModelDeploymentName}/completions?api-version={ApiVersion}", UriKind.Relative);
419-
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
419+
var tokenizer = TiktokenTokenizer.CreateForModel(
420420
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
421-
).ConfigureAwait(false);
421+
);
422422

423423
var retries = 0;
424424

@@ -467,7 +467,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
467467
}
468468
}
469469

470-
private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslationSession translationSession, ITokenizer tokenizer)
470+
private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslationSession translationSession, TiktokenTokenizer tokenizer)
471471
{
472472
var batchItems = new PromptList();
473473
var batchTokens = 0;
@@ -481,7 +481,7 @@ private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslati
481481
continue;
482482
}
483483

484-
var tokens = tokenizer.Encode(prompt, new List<string>()).Count;
484+
var tokens = tokenizer.CountTokens(prompt);
485485

486486
if (tokens > PromptTokens)
487487
{
@@ -499,7 +499,7 @@ private IEnumerable<PromptList> PackCompletionModelPromptsIntoBatches(ITranslati
499499
{
500500
yield return batchItems;
501501

502-
batchItems = new PromptList();
502+
batchItems = [];
503503
batchTokens = 0;
504504
}
505505

@@ -634,12 +634,12 @@ public string? ModelName
634634

635635
private static IList<ICredentialItem> GetCredentials()
636636
{
637-
return new ICredentialItem[]
638-
{
637+
return
638+
[
639639
new CredentialItem("AuthenticationKey", "Key"),
640640
new CredentialItem("Url", "Endpoint Url", false),
641641
new CredentialItem("ModelDeploymentName", "Model Deployment Name", false),
642642
new CredentialItem("ModelName", "Model Name", false),
643-
};
643+
];
644644
}
645645
}

src/ResXManager.Translators/FodyWeavers.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
<Weavers xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="FodyWeavers.xsd">
33
<Throttle />
44
<PropertyChanged />
5-
<ILMerge IncludeAssemblies="Microsoft.DeepDev.TokenizerLib" />
5+
<ILMerge IncludeAssemblies="Microsoft.ML.Tokenizers" />
66
</Weavers>

src/ResXManager.Translators/OpenAITranslator.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
namespace ResXManager.Translators;
22

3-
using global::Microsoft.DeepDev;
3+
using global::Microsoft.ML.Tokenizers;
44
using Newtonsoft.Json;
55
using ResXManager.Infrastructure;
66
using System;
@@ -145,9 +145,9 @@ private sealed class CompletionsResponse
145145
private async Task TranslateUsingCompletionsModel(ITranslationSession translationSession, HttpClient client)
146146
{
147147
var endpointUri = new Uri($"/v1/chat/completions", UriKind.Relative);
148-
var tokenizer = await TokenizerBuilder.CreateByModelNameAsync(
148+
var tokenizer = TiktokenTokenizer.CreateForModel(
149149
ModelName ?? throw new InvalidOperationException("No model name provided in configuration!")
150-
).ConfigureAwait(false);
150+
);
151151

152152
var retries = 0;
153153

@@ -200,7 +200,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
200200
}
201201
}
202202

203-
private IEnumerable<(ITranslationItem item, string prompt)> PackCompletionModelPrompts(ITranslationSession translationSession, ITokenizer tokenizer)
203+
private IEnumerable<(ITranslationItem item, string prompt)> PackCompletionModelPrompts(ITranslationSession translationSession, TiktokenTokenizer tokenizer)
204204
{
205205
foreach (var item in translationSession.Items)
206206
{
@@ -211,7 +211,7 @@ private async Task TranslateUsingCompletionsModel(ITranslationSession translatio
211211
continue;
212212
}
213213

214-
var tokens = tokenizer.Encode(prompt, new List<string>()).Count;
214+
var tokens = tokenizer.CountTokens(prompt);
215215

216216
if (tokens > PromptTokens)
217217
{
@@ -328,10 +328,10 @@ public string? ModelName
328328

329329
private static IList<ICredentialItem> GetCredentials()
330330
{
331-
return new ICredentialItem[]
332-
{
331+
return
332+
[
333333
new CredentialItem("AuthenticationKey", "Key"),
334334
new CredentialItem("ModelName", "Model Name", false),
335-
};
335+
];
336336
}
337337
}

src/ResXManager.Translators/ResXManager.Translators.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
<ItemGroup>
5353
<PackageReference Include="Fody" PrivateAssets="all" />
5454
<PackageReference Include="ILMerge.Fody" PrivateAssets="all" />
55-
<PackageReference Include="Microsoft.DeepDev.TokenizerLib" />
55+
<PackageReference Include="Microsoft.ML.Tokenizers" />
5656
<PackageReference Include="PropertyChanged.Fody" PrivateAssets="all" />
5757
<PackageReference Include="System.ComponentModel.Annotations" />
5858
<PackageReference Include="System.Net.Http" />

0 commit comments

Comments
 (0)