Skip to content

Commit 13cc6f2

Browse files
committed
one prompt per completion
Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
1 parent 7a9d785 commit 13cc6f2

File tree

8 files changed

+81
-85
lines changed

8 files changed

+81
-85
lines changed

api/tokenizerpb/tokenizer.pb.go

Lines changed: 49 additions & 40 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/tokenizerpb/tokenizer.proto

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,23 +185,24 @@ message RenderChatCompletionResponse {
185185
// RenderCompletionRequest contains the fields from an OpenAI completion request
186186
// that are relevant to rendering (validation + tokenization).
187187
message RenderCompletionRequest {
188-
string model_name = 1; // Model name to use for renderer selection
189-
repeated string prompts = 2; // Text prompts to render (one item per response)
188+
string model_name = 1; // Model name to use for renderer selection
189+
string prompt = 2; // Text prompt to render
190190
}
191191

192-
// RenderCompletionResponse contains the rendered output for each prompt in the completion request
192+
// RenderCompletionResponse contains the rendered output for a completion request.
193193
message RenderCompletionResponse {
194-
repeated RenderChatCompletionResponse items = 1; // One item per prompt in the request
195-
bool success = 2; // Whether the request was successful
196-
string error_message = 3; // Error message if the request failed
194+
string request_id = 1; // Request ID from the render response
195+
repeated uint32 token_ids = 2; // Token IDs for the rendered prompt
196+
bool success = 3; // Whether the request was successful
197+
string error_message = 4; // Error message if the request failed
197198
}
198199

199200
// TokenizationService defines the gRPC service for tokenization
200201
service TokenizationService {
201202
// Tokenize converts a text input to token IDs
202203
rpc Tokenize(TokenizeRequest) returns (TokenizeResponse);
203204

204-
// RenderChatTemplate renders a chat template with the given messages
205+
// Deprecated: use RenderChatCompletion instead.
205206
rpc RenderChatTemplate(ChatTemplateRequest) returns (ChatTemplateResponse);
206207

207208
// InitializeTokenizer initializes the tokenizer for a specific model

api/tokenizerpb/tokenizer_grpc.pb.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/tokenization/uds_tokenizer.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func (u *UdsTokenizer) Render(prompt string) ([]uint32, []types.Offset, error) {
193193

194194
resp, err := u.client.RenderCompletion(ctx, &tokenizerpb.RenderCompletionRequest{
195195
ModelName: u.model,
196-
Prompts: []string{prompt},
196+
Prompt: prompt,
197197
})
198198
if err != nil {
199199
return nil, nil, fmt.Errorf("gRPC RenderCompletion request failed: %w", err)
@@ -203,16 +203,7 @@ func (u *UdsTokenizer) Render(prompt string) ([]uint32, []types.Offset, error) {
203203
return nil, nil, fmt.Errorf("render completion failed: %s", resp.ErrorMessage)
204204
}
205205

206-
if len(resp.Items) == 0 {
207-
return nil, nil, fmt.Errorf("render completion returned no items")
208-
}
209-
210-
item := resp.Items[0]
211-
if !item.Success {
212-
return nil, nil, fmt.Errorf("render completion item failed: %s", item.ErrorMessage)
213-
}
214-
215-
return item.TokenIds, nil, nil
206+
return resp.TokenIds, nil, nil
216207
}
217208

218209
// Encode tokenizes the input string and returns the token IDs and offsets.

pkg/tokenization/uds_tokenizer_test.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,22 +141,15 @@ func (m *mockTokenizationServer) RenderCompletion(
141141
}, nil
142142
}
143143

144-
items := make([]*tokenizerpb.RenderChatCompletionResponse, 0, len(req.Prompts))
145-
for _, prompt := range req.Prompts {
146-
tokens := make([]uint32, 0, len(prompt))
147-
for _, r := range prompt {
148-
tokens = append(tokens, uint32(r))
149-
}
150-
items = append(items, &tokenizerpb.RenderChatCompletionResponse{
151-
RequestId: "mock-request-id",
152-
TokenIds: tokens,
153-
Success: true,
154-
})
144+
tokens := make([]uint32, 0, len(req.Prompt))
145+
for _, r := range req.Prompt {
146+
tokens = append(tokens, uint32(r))
155147
}
156148

157149
return &tokenizerpb.RenderCompletionResponse{
158-
Items: items,
159-
Success: true,
150+
RequestId: "mock-request-id",
151+
TokenIds: tokens,
152+
Success: true,
160153
}, nil
161154
}
162155

services/uds_tokenizer/tokenizer_grpc_service.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,18 +235,20 @@ def RenderCompletion(
235235
try:
236236
completion_request = CompletionRequest(
237237
model=request.model_name,
238-
prompt=list(request.prompts),
238+
prompt=request.prompt,
239239
)
240240
results = asyncio.run_coroutine_threadsafe(
241241
self.renderer_service.render_completion(
242242
completion_request, request.model_name
243243
),
244244
self._loop,
245245
).result()
246-
items: list[tokenizer_pb2.RenderChatCompletionResponse] = [
247-
self._generate_request_to_proto(r) for r in results
248-
]
249-
return tokenizer_pb2.RenderCompletionResponse(items=items, success=True)
246+
result = results[0]
247+
return tokenizer_pb2.RenderCompletionResponse(
248+
request_id=result.request_id,
249+
token_ids=list(result.token_ids),
250+
success=True,
251+
)
250252
except Exception as e:
251253
logging.error(f"RenderCompletion failed: {e}", exc_info=True)
252254
context.abort(grpc.StatusCode.INTERNAL, str(e))

0 commit comments

Comments
 (0)