Skip to content

Commit 3c1b582

Browse files
Copilotxenova
andauthored
Remove padding and truncation logic (#8)
* Initial plan * Remove padding and truncation logic from tokenizers.js Co-authored-by: xenova <[email protected]> * Remove truncate helper --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: xenova <[email protected]>
1 parent 70aef0a commit 3c1b582

File tree

3 files changed

+0
-53
lines changed

3 files changed

+0
-53
lines changed

src/core/Tokenizer.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class Tokenizer {
5050
private added_tokens_map: Map<string, AddedToken>;
5151
private special_tokens: Array<string | TokenConfig>;
5252
private all_special_ids: Array<number>;
53-
private model_max_length: number;
5453
private remove_space: boolean;
5554
private clean_up_tokenization_spaces: boolean;
5655
private do_lowercase_and_remove_accent: boolean;
@@ -120,7 +119,6 @@ class Tokenizer {
120119
this.added_tokens_map = new Map(
121120
this.added_tokens.map((x) => [x.content, x]),
122121
);
123-
this.model_max_length = this.config.model_max_length;
124122
this.remove_space = this.config.remove_space;
125123
this.clean_up_tokenization_spaces =
126124
this.config.clean_up_tokenization_spaces ?? true;

src/static/tokenizer.d.ts

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,6 @@ export interface TokenizerConfig {
2424
add_eos_token?: boolean;
2525
add_prefix_space?: boolean;
2626

27-
// Padding and truncation
28-
padding_side?: "left" | "right";
29-
truncation_side?: "left" | "right";
30-
model_max_length?: number;
31-
max_length?: number;
32-
stride?: number;
33-
pad_to_multiple_of?: number;
34-
3527
// Cleaning and processing
3628
clean_up_tokenization_spaces?: boolean;
3729
split_special_tokens?: boolean;
@@ -74,8 +66,6 @@ export interface TokenConfig {
7466

7567
export interface TokenizerJSON {
7668
version?: string;
77-
truncation?: TruncationConfig | null;
78-
padding?: PaddingConfig | null;
7969
added_tokens?: AddedToken[];
8070
normalizer?: TokenizerConfigNormalizer;
8171
pre_tokenizer?: TokenizerConfigPreTokenizer;
@@ -84,29 +74,6 @@ export interface TokenizerJSON {
8474
model: TokenizerModelConfig;
8575
}
8676

87-
// ----------------------------------------------------------------------------
88-
// Truncation Configuration
89-
// ----------------------------------------------------------------------------
90-
91-
export interface TruncationConfig {
92-
direction?: "Left" | "Right";
93-
max_length: number;
94-
strategy?: "LongestFirst" | "OnlyFirst" | "OnlySecond";
95-
stride?: number;
96-
}
97-
98-
// ----------------------------------------------------------------------------
99-
// Padding Configuration
100-
// ----------------------------------------------------------------------------
101-
102-
export interface PaddingConfig {
103-
direction?: "Left" | "Right";
104-
pad_id?: number;
105-
pad_token?: string;
106-
pad_type_id?: number;
107-
pad_to_multiple_of?: number | null;
108-
}
109-
11077
// ----------------------------------------------------------------------------
11178
// Added Tokens
11279
// ----------------------------------------------------------------------------

src/utils/core.ts

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,24 +158,6 @@ export const regex_split = (text: string, regex: RegExp): string[] => {
158158
export const remove_accents = (text: string): string =>
159159
text.replace(/\p{M}/gu, "");
160160

161-
/**
162-
* Helper function for truncating values of an object, which are each arrays.
163-
* NOTE: No additional checks are made here for validity of arguments.
164-
* @param item The input object.
165-
* @param length The length to truncate to.
166-
* @private
167-
*/
168-
export const truncate_helper = (
169-
item: Record<string, any[]>,
170-
length: number,
171-
): void => {
172-
// Setting .length to a lower value truncates the array in-place:
173-
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/length
174-
for (const key of Object.keys(item)) {
175-
item[key].length = length;
176-
}
177-
};
178-
179161
export const validate_object = (
180162
obj: Object,
181163
name: string,

0 commit comments

Comments
 (0)