forked from vllm-project/aibrix
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtypes.go
More file actions
109 lines (93 loc) · 4.14 KB
/
types.go
File metadata and controls
109 lines (93 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
Copyright 2025 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tokenizer
import (
"encoding/json"
"time"
)
// TokenizeInputType represents the type of tokenization input
type TokenizeInputType string
const (
// CompletionInput represents simple text completion tokenization
CompletionInput TokenizeInputType = "completion"
// ChatInput represents chat message tokenization with templates
ChatInput TokenizeInputType = "chat"
)
// TokenizeInput represents a unified input structure for tokenization
type TokenizeInput struct {
Type TokenizeInputType
Text string // For completion input
Messages []ChatMessage // For chat input
AddSpecialTokens bool
ReturnTokenStrings bool
AddGenerationPrompt bool // For chat input only
}
// TokenizeResult represents the result of tokenization
type TokenizeResult struct {
Count int `json:"count"`
MaxModelLen int `json:"max_model_len"`
Tokens []int `json:"tokens"`
TokenStrings []string `json:"token_strs,omitempty"`
}
// ChatMessage represents a single message in a chat conversation
// Content can be either a string (simple text) or a structured array (multimodal)
type ChatMessage struct {
Role string `json:"role"`
Content json.RawMessage `json:"content"` // Can be string or array for multimodal
}
// RemoteTokenizerConfig represents configuration for a remote tokenizer
type RemoteTokenizerConfig struct {
Engine string // "vllm", "sglang", "triton", etc.
Endpoint string // Base URL of the service
Model string // Model identifier (optional)
Timeout time.Duration // Request timeout
MaxRetries int // Max retry attempts
AddSpecialTokens bool // Default: true
ReturnTokenStrings bool // Default: false
}
// vllmTokenizeCompletionRequest represents a request to tokenize completion text
type vllmTokenizeCompletionRequest struct {
Model string `json:"model,omitempty"`
Prompt string `json:"prompt"`
AddSpecialTokens *bool `json:"add_special_tokens,omitempty"`
ReturnTokenStrs *bool `json:"return_token_strs,omitempty"`
}
// vllmTokenizeChatRequest represents a request to tokenize chat messages
type vllmTokenizeChatRequest struct {
Model string `json:"model,omitempty"`
Messages []ChatMessage `json:"messages"`
AddSpecialTokens *bool `json:"add_special_tokens,omitempty"`
AddGenerationPrompt *bool `json:"add_generation_prompt,omitempty"`
ContinueFinalMessage *bool `json:"continue_final_message,omitempty"`
ReturnTokenStrs *bool `json:"return_token_strs,omitempty"`
ChatTemplate *string `json:"chat_template,omitempty"`
ChatTemplateKwargs map[string]interface{} `json:"chat_template_kwargs,omitempty"`
Tools []interface{} `json:"tools,omitempty"`
MMProcessorKwargs map[string]interface{} `json:"mm_processor_kwargs,omitempty"`
}
// vllmTokenizeResponse represents the response from tokenization endpoints
type vllmTokenizeResponse struct {
Count int `json:"count"`
MaxModelLen int `json:"max_model_len"`
Tokens []int `json:"tokens"`
TokenStrs []string `json:"token_strs,omitempty"`
}
// vllmDetokenizeRequest represents a request to detokenize tokens
type vllmDetokenizeRequest struct {
Model string `json:"model,omitempty"`
Tokens []int `json:"tokens"`
}
// vllmDetokenizeResponse represents the response from detokenization endpoint
type vllmDetokenizeResponse struct {
Prompt string `json:"prompt"`
}