@@ -128,19 +128,48 @@ class Tokenizer {
128128 this . config . do_lowercase_and_remove_accent ?? false ;
129129 }
130130
131+ /**
132+ * Encodes a single text or a pair of texts using the model's tokenizer.
133+ *
134+ * @param text The text to encode.
135+ * @param options An optional object containing the following properties:
136+ * @returns An object containing the encoded text.
137+ */
138+
139+ // Overload: when return_token_type_ids is explicitly true
140+ public encode (
141+ text : string ,
142+ options : EncodeOptions & { return_token_type_ids : true } ,
143+ ) : EncodingSingle & { token_type_ids : number [ ] } ;
144+
145+ // Overload: when return_token_type_ids is false/null or not provided
146+ public encode ( text : string , options ?: EncodeOptions ) : EncodingSingle ;
147+
148+ // Implementation
131149 public encode (
132150 text : string ,
133151 {
134- text_pair,
135- add_special_tokens,
136- return_token_type_ids,
152+ text_pair = null ,
153+ add_special_tokens = true ,
154+ return_token_type_ids = null ,
137155 } : EncodeOptions = { } ,
138- ) : Array < number > {
139- return this . encode_plus ( text , {
156+ ) : EncodingSingle {
157+ const { tokens , token_type_ids } = this . tokenize_helper ( text , {
140158 text_pair,
141159 add_special_tokens,
142- return_token_type_ids,
143- } ) . input_ids ;
160+ } ) ;
161+
162+ const input_ids = this . model . convert_tokens_to_ids ( tokens ) ;
163+ const result : EncodingSingle = {
164+ ids : input_ids ,
165+ tokens,
166+ attention_mask : new Array ( input_ids . length ) . fill ( 1 ) ,
167+ } ;
168+
169+ if ( return_token_type_ids && token_type_ids ) {
170+ result . token_type_ids = token_type_ids ;
171+ }
172+ return result ;
144173 }
145174
146175 public decode (
@@ -198,40 +227,6 @@ class Tokenizer {
198227 return this . tokenize_helper ( text , { text_pair, add_special_tokens } ) . tokens ;
199228 }
200229
201- /**
202- * Encodes a single text or a pair of texts using the model's tokenizer.
203- *
204- * @param text The text to encode.
205- * @param options An optional object containing the following properties:
206- * @returns An object containing the encoded text.
207- * @private
208- */
209-
210- private encode_plus (
211- text : string ,
212- {
213- text_pair = null ,
214- add_special_tokens = true ,
215- return_token_type_ids = null ,
216- } : EncodeOptions ,
217- ) : EncodingSingle {
218- const { tokens, token_type_ids } = this . tokenize_helper ( text , {
219- text_pair,
220- add_special_tokens,
221- } ) ;
222-
223- const input_ids = this . model . convert_tokens_to_ids ( tokens ) ;
224- const result : EncodingSingle = {
225- input_ids,
226- attention_mask : new Array ( input_ids . length ) . fill ( 1 ) ,
227- } ;
228-
229- if ( return_token_type_ids && token_type_ids ) {
230- result . token_type_ids = token_type_ids ;
231- }
232- return result ;
233- }
234-
235230 private encode_text ( text : string | null ) : string [ ] | null {
236231 if ( text === null ) {
237232 return null ;
0 commit comments