Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/core/Tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class Tokenizer {
private added_tokens_map: Map<string, AddedToken>;
private special_tokens: Array<string | TokenConfig>;
private all_special_ids: Array<number>;
private model_max_length: number;
private remove_space: boolean;
private clean_up_tokenization_spaces: boolean;
private do_lowercase_and_remove_accent: boolean;
Expand Down Expand Up @@ -120,7 +119,6 @@ class Tokenizer {
this.added_tokens_map = new Map(
this.added_tokens.map((x) => [x.content, x]),
);
this.model_max_length = this.config.model_max_length;
this.remove_space = this.config.remove_space;
this.clean_up_tokenization_spaces =
this.config.clean_up_tokenization_spaces ?? true;
Expand Down
33 changes: 0 additions & 33 deletions src/static/tokenizer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@ export interface TokenizerConfig {
add_eos_token?: boolean;
add_prefix_space?: boolean;

// Padding and truncation
padding_side?: "left" | "right";
truncation_side?: "left" | "right";
model_max_length?: number;
max_length?: number;
stride?: number;
pad_to_multiple_of?: number;

// Cleaning and processing
clean_up_tokenization_spaces?: boolean;
split_special_tokens?: boolean;
Expand Down Expand Up @@ -74,8 +66,6 @@ export interface TokenConfig {

export interface TokenizerJSON {
version?: string;
truncation?: TruncationConfig | null;
padding?: PaddingConfig | null;
added_tokens?: AddedToken[];
normalizer?: TokenizerConfigNormalizer;
pre_tokenizer?: TokenizerConfigPreTokenizer;
Expand All @@ -84,29 +74,6 @@ export interface TokenizerJSON {
model: TokenizerModelConfig;
}

// ----------------------------------------------------------------------------
// Truncation Configuration
// ----------------------------------------------------------------------------

export interface TruncationConfig {
direction?: "Left" | "Right";
max_length: number;
strategy?: "LongestFirst" | "OnlyFirst" | "OnlySecond";
stride?: number;
}

// ----------------------------------------------------------------------------
// Padding Configuration
// ----------------------------------------------------------------------------

export interface PaddingConfig {
direction?: "Left" | "Right";
pad_id?: number;
pad_token?: string;
pad_type_id?: number;
pad_to_multiple_of?: number | null;
}

// ----------------------------------------------------------------------------
// Added Tokens
// ----------------------------------------------------------------------------
Expand Down
18 changes: 0 additions & 18 deletions src/utils/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,24 +158,6 @@ export const regex_split = (text: string, regex: RegExp): string[] => {
export const remove_accents = (text: string): string =>
text.replace(/\p{M}/gu, "");

/**
* Helper function for truncating values of an object, which are each arrays.
* NOTE: No additional checks are made here for validity of arguments.
* @param item The input object.
* @param length The length to truncate to.
* @private
*/
export const truncate_helper = (
item: Record<string, any[]>,
length: number,
): void => {
// Setting .length to a lower value truncates the array in-place:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/length
for (const key of Object.keys(item)) {
item[key].length = length;
}
};

export const validate_object = (
obj: Object,
name: string,
Expand Down