|
| 1 | +import { Tokenizer } from "../src"; |
| 2 | +import type { Encoding } from "../src"; |
| 3 | +import { Metaspace, Whitespace } from "../src/pre-tokenizers"; |
| 4 | +import { BPE } from "../src/models"; |
| 5 | + |
| 6 | +describe("Additional exports", () => { |
| 7 | + describe("Main exports", () => { |
| 8 | + it("should export Tokenizer", () => { |
| 9 | + expect(Tokenizer).toBeDefined(); |
| 10 | + }); |
| 11 | + |
| 12 | + it("should export Encoding type (compile-time test)", () => { |
| 13 | + // This test verifies that the Encoding type can be used |
| 14 | + const encoding: Encoding = { |
| 15 | + ids: [1, 2, 3], |
| 16 | + tokens: ["hello", "world", "!"], |
| 17 | + attention_mask: [1, 1, 1], |
| 18 | + }; |
| 19 | + expect(encoding.ids).toEqual([1, 2, 3]); |
| 20 | + }); |
| 21 | + }); |
| 22 | + |
| 23 | + describe("Pre-tokenizer exports", () => { |
| 24 | + it("should export Metaspace pre-tokenizer", () => { |
| 25 | + expect(Metaspace).toBeDefined(); |
| 26 | + const metaspace = new Metaspace({ |
| 27 | + type: "Metaspace", |
| 28 | + replacement: "▁", |
| 29 | + add_prefix_space: true, |
| 30 | + }); |
| 31 | + expect(metaspace).toBeInstanceOf(Metaspace); |
| 32 | + }); |
| 33 | + |
| 34 | + it("should export Whitespace pre-tokenizer", () => { |
| 35 | + expect(Whitespace).toBeDefined(); |
| 36 | + const whitespace = new Whitespace(); |
| 37 | + expect(whitespace).toBeInstanceOf(Whitespace); |
| 38 | + }); |
| 39 | + |
| 40 | + it("Metaspace pre-tokenizer should work correctly", () => { |
| 41 | + const metaspace = new Metaspace({ |
| 42 | + type: "Metaspace", |
| 43 | + replacement: "▁", |
| 44 | + add_prefix_space: true, |
| 45 | + }); |
| 46 | + const result = metaspace.pre_tokenize_text("hello world"); |
| 47 | + expect(result).toEqual(["▁hello▁world"]); |
| 48 | + }); |
| 49 | + |
| 50 | + it("Whitespace pre-tokenizer should work correctly", () => { |
| 51 | + const whitespace = new Whitespace(); |
| 52 | + const result = whitespace.pre_tokenize_text("hello world!"); |
| 53 | + expect(result).toEqual(["hello", "world", "!"]); |
| 54 | + }); |
| 55 | + }); |
| 56 | + |
| 57 | + describe("Model exports", () => { |
| 58 | + it("should export BPE model", () => { |
| 59 | + expect(BPE).toBeDefined(); |
| 60 | + }); |
| 61 | + |
| 62 | + it("BPE model should be instantiable", () => { |
| 63 | + const bpe = new BPE({ |
| 64 | + type: "BPE", |
| 65 | + vocab: { a: 0, b: 1, c: 2 }, |
| 66 | + merges: [["a", "b"]], |
| 67 | + unk_token: "<unk>", |
| 68 | + ignore_merges: false, |
| 69 | + }); |
| 70 | + expect(bpe).toBeInstanceOf(BPE); |
| 71 | + }); |
| 72 | + }); |
| 73 | + |
| 74 | + describe("Integration test - import paths", () => { |
| 75 | + it("should support the documented import syntax", async () => { |
| 76 | + // This test verifies that the documented import paths work |
| 77 | + // import { Tokenizer, Encoding } from "@huggingface/tokenizers"; |
| 78 | + const { Tokenizer: T1 } = await import("../src/index"); |
| 79 | + expect(T1).toBeDefined(); |
| 80 | + // Encoding is a type-only export, so we can't test it at runtime |
| 81 | + |
| 82 | + // import { Metaspace, Whitespace } from "@huggingface/tokenizers/pre-tokenizers"; |
| 83 | + const { Metaspace: M1, Whitespace: W1 } = await import( |
| 84 | + "../src/pre-tokenizers" |
| 85 | + ); |
| 86 | + expect(M1).toBeDefined(); |
| 87 | + expect(W1).toBeDefined(); |
| 88 | + |
| 89 | + // import { BPE } from "@huggingface/tokenizers/models"; |
| 90 | + const { BPE: B1 } = await import("../src/models"); |
| 91 | + expect(B1).toBeDefined(); |
| 92 | + }); |
| 93 | + }); |
| 94 | +}); |
0 commit comments