Skip to content

Commit 0c6f429

Browse files
Copilotxenova
andcommitted
Simplify to single main export with unique names for all components
Co-authored-by: xenova <[email protected]>
1 parent 3f539bb commit 0c6f429

File tree

6 files changed

+42
-160
lines changed

6 files changed

+42
-160
lines changed

jest.config.mjs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,6 @@ export default {
178178

179179
// Module name mapper for path aliases
180180
moduleNameMapper: {
181-
"^@huggingface/tokenizers$": "<rootDir>/src/index.ts",
182-
"^@huggingface/tokenizers/pre-tokenizers$": "<rootDir>/src/pre-tokenizers.ts",
183-
"^@huggingface/tokenizers/models$": "<rootDir>/src/models.ts",
184-
"^@huggingface/tokenizers/normalizers$": "<rootDir>/src/normalizers.ts",
185-
"^@huggingface/tokenizers/decoders$": "<rootDir>/src/decoders.ts",
186-
"^@huggingface/tokenizers/post-processors$": "<rootDir>/src/post-processors.ts",
187181
"^@utils$": "<rootDir>/src/utils/index.ts",
188182
"^@utils/(.*)$": "<rootDir>/src/utils/$1",
189183
"^@core/(.*)$": "<rootDir>/src/core/$1",

package.json

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,61 +18,6 @@
1818
"import": "./dist/tokenizers.min.mjs"
1919
},
2020
"default": "./dist/tokenizers.min.mjs"
21-
},
22-
"./pre-tokenizers": {
23-
"types": "./types/pre-tokenizers.d.ts",
24-
"node": {
25-
"require": "./dist/tokenizers.min.cjs",
26-
"import": "./dist/tokenizers.min.mjs"
27-
},
28-
"browser": {
29-
"import": "./dist/tokenizers.min.mjs"
30-
},
31-
"default": "./dist/tokenizers.min.mjs"
32-
},
33-
"./models": {
34-
"types": "./types/models.d.ts",
35-
"node": {
36-
"require": "./dist/tokenizers.min.cjs",
37-
"import": "./dist/tokenizers.min.mjs"
38-
},
39-
"browser": {
40-
"import": "./dist/tokenizers.min.mjs"
41-
},
42-
"default": "./dist/tokenizers.min.mjs"
43-
},
44-
"./normalizers": {
45-
"types": "./types/normalizers.d.ts",
46-
"node": {
47-
"require": "./dist/tokenizers.min.cjs",
48-
"import": "./dist/tokenizers.min.mjs"
49-
},
50-
"browser": {
51-
"import": "./dist/tokenizers.min.mjs"
52-
},
53-
"default": "./dist/tokenizers.min.mjs"
54-
},
55-
"./decoders": {
56-
"types": "./types/decoders.d.ts",
57-
"node": {
58-
"require": "./dist/tokenizers.min.cjs",
59-
"import": "./dist/tokenizers.min.mjs"
60-
},
61-
"browser": {
62-
"import": "./dist/tokenizers.min.mjs"
63-
},
64-
"default": "./dist/tokenizers.min.mjs"
65-
},
66-
"./post-processors": {
67-
"types": "./types/post-processors.d.ts",
68-
"node": {
69-
"require": "./dist/tokenizers.min.cjs",
70-
"import": "./dist/tokenizers.min.mjs"
71-
},
72-
"browser": {
73-
"import": "./dist/tokenizers.min.mjs"
74-
},
75-
"default": "./dist/tokenizers.min.mjs"
7621
}
7722
},
7823
"files": [

scripts/build.mjs

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { build as esbuild } from "esbuild";
22
import { execSync } from "node:child_process";
3-
import { readFileSync, writeFileSync } from "node:fs";
3+
import { readFileSync } from "node:fs";
44
import { gzipSync } from "node:zlib";
55

66
console.log("Generating TypeScript declarations...");
@@ -50,51 +50,3 @@ await build("dist/tokenizers.cjs");
5050
await build("dist/tokenizers.min.mjs");
5151
await build("dist/tokenizers.min.cjs");
5252

53-
// Read the type definition files to extract export names
54-
const readExports = (filename) => {
55-
const content = readFileSync(`types/${filename}`, 'utf-8');
56-
const exports = [];
57-
const exportRegex = /export \{ default as (\w+) \}/g;
58-
let match;
59-
while ((match = exportRegex.exec(content)) !== null) {
60-
exports.push(match[1]);
61-
}
62-
return exports;
63-
};
64-
65-
// Generate wrapper files for submodule exports
66-
const submodules = {
67-
"pre-tokenizers": readExports("pre-tokenizers.d.ts"),
68-
"models": readExports("models.d.ts"),
69-
"normalizers": readExports("normalizers.d.ts"),
70-
"decoders": readExports("decoders.d.ts"),
71-
"post-processors": readExports("post-processors.d.ts"),
72-
};
73-
74-
for (const [path, exportNames] of Object.entries(submodules)) {
75-
const exportList = exportNames.join(", ");
76-
77-
// ESM wrappers
78-
writeFileSync(
79-
`dist/${path}.mjs`,
80-
`export { ${exportList} } from './tokenizers.mjs';\n`
81-
);
82-
writeFileSync(
83-
`dist/${path}.min.mjs`,
84-
`export { ${exportList} } from './tokenizers.min.mjs';\n`
85-
);
86-
87-
// CJS wrappers
88-
const cjsExports = exportNames.map(name => ` ${name}: main.${name}`).join(',\n');
89-
writeFileSync(
90-
`dist/${path}.cjs`,
91-
`const main = require('./tokenizers.cjs');\nmodule.exports = {\n${cjsExports}\n};\n`
92-
);
93-
writeFileSync(
94-
`dist/${path}.min.cjs`,
95-
`const main = require('./tokenizers.min.cjs');\nmodule.exports = {\n${cjsExports}\n};\n`
96-
);
97-
}
98-
99-
console.log("\n✓ Generated wrapper files for submodule exports");
100-

src/index.ts

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,7 @@ export {
1717
} from "./decoders";
1818

1919
// Models
20-
export {
21-
Model,
22-
BPE,
23-
Unigram,
24-
WordPiece,
25-
} from "./models";
20+
export { Model, BPE, Unigram, WordPiece } from "./models";
2621

2722
// Normalizers
2823
export {
@@ -45,9 +40,9 @@ export {
4540
export {
4641
PreTokenizer,
4742
BertPreTokenizer,
48-
ByteLevel,
43+
ByteLevel as ByteLevelPreTokenizer,
4944
Digits,
50-
Metaspace,
45+
Metaspace as MetaspacePreTokenizer,
5146
Punctuation,
5247
Sequence as PreTokenizerSequence,
5348
Split,
@@ -65,4 +60,3 @@ export {
6560
TemplateProcessing,
6661
} from "./post-processors";
6762

68-

tests/exports.test.ts

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Tokenizer } from "../src";
22
import type { Encoding } from "../src";
3-
import { Metaspace, Whitespace } from "../src/pre-tokenizers";
4-
import { BPE } from "../src/models";
3+
import { MetaspacePreTokenizer, Whitespace } from "../src";
4+
import { BPE } from "../src";
55

66
describe("Additional exports", () => {
77
describe("Main exports", () => {
@@ -21,14 +21,14 @@ describe("Additional exports", () => {
2121
});
2222

2323
describe("Pre-tokenizer exports", () => {
24-
it("should export Metaspace pre-tokenizer", () => {
25-
expect(Metaspace).toBeDefined();
26-
const metaspace = new Metaspace({
24+
it("should export MetaspacePreTokenizer", () => {
25+
expect(MetaspacePreTokenizer).toBeDefined();
26+
const metaspace = new MetaspacePreTokenizer({
2727
type: "Metaspace",
2828
replacement: "▁",
2929
add_prefix_space: true,
3030
});
31-
expect(metaspace).toBeInstanceOf(Metaspace);
31+
expect(metaspace).toBeInstanceOf(MetaspacePreTokenizer);
3232
});
3333

3434
it("should export Whitespace pre-tokenizer", () => {
@@ -37,8 +37,8 @@ describe("Additional exports", () => {
3737
expect(whitespace).toBeInstanceOf(Whitespace);
3838
});
3939

40-
it("Metaspace pre-tokenizer should work correctly", () => {
41-
const metaspace = new Metaspace({
40+
it("MetaspacePreTokenizer should work correctly", () => {
41+
const metaspace = new MetaspacePreTokenizer({
4242
type: "Metaspace",
4343
replacement: "▁",
4444
add_prefix_space: true,
@@ -71,23 +71,18 @@ describe("Additional exports", () => {
7171
});
7272
});
7373

74-
describe("Integration test - import paths", () => {
75-
it("should support the documented import syntax", async () => {
76-
// This test verifies that the documented import paths work
77-
// import { Tokenizer, Encoding } from "@huggingface/tokenizers";
78-
const { Tokenizer: T1 } = await import("../src/index");
74+
describe("Integration test - import from main export", () => {
75+
it("should support importing everything from main export", async () => {
76+
// All exports should be available from the main index
77+
const {
78+
Tokenizer: T1,
79+
MetaspacePreTokenizer: M1,
80+
Whitespace: W1,
81+
BPE: B1,
82+
} = await import("../src/index");
7983
expect(T1).toBeDefined();
80-
// Encoding is a type-only export, so we can't test it at runtime
81-
82-
// import { Metaspace, Whitespace } from "@huggingface/tokenizers/pre-tokenizers";
83-
const { Metaspace: M1, Whitespace: W1 } = await import(
84-
"../src/pre-tokenizers"
85-
);
8684
expect(M1).toBeDefined();
8785
expect(W1).toBeDefined();
88-
89-
// import { BPE } from "@huggingface/tokenizers/models";
90-
const { BPE: B1 } = await import("../src/models");
9186
expect(B1).toBeDefined();
9287
});
9388
});

tests/package-exports.test.ts

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1-
// Integration test to verify package.json exports work correctly
2-
// This simulates how a user would import from the published package
1+
// Integration test to verify all exports are available from main export
2+
import { Tokenizer } from "../src";
3+
import type { Encoding } from "../src";
4+
import {
5+
MetaspacePreTokenizer,
6+
Whitespace,
7+
BPE,
8+
Lowercase,
9+
StripAccents,
10+
BPEDecoder,
11+
TemplateProcessing,
12+
} from "../src";
313

4-
import { Tokenizer } from "@huggingface/tokenizers";
5-
import type { Encoding } from "@huggingface/tokenizers";
6-
import { Metaspace, Whitespace } from "@huggingface/tokenizers/pre-tokenizers";
7-
import { BPE } from "@huggingface/tokenizers/models";
8-
import { Lowercase, StripAccents } from "@huggingface/tokenizers/normalizers";
9-
import { BPEDecoder } from "@huggingface/tokenizers/decoders";
10-
import { TemplateProcessing } from "@huggingface/tokenizers/post-processors";
11-
12-
describe("Package exports integration", () => {
13-
it("should import main exports", () => {
14+
describe("Main export integration", () => {
15+
it("should import Tokenizer and Encoding", () => {
1416
expect(Tokenizer).toBeDefined();
15-
17+
1618
// Encoding is a type-only export
1719
const enc: Encoding = {
1820
ids: [1, 2, 3],
@@ -22,25 +24,25 @@ describe("Package exports integration", () => {
2224
expect(enc.ids).toHaveLength(3);
2325
});
2426

25-
it("should import pre-tokenizers", () => {
26-
expect(Metaspace).toBeDefined();
27+
it("should import pre-tokenizers from main export", () => {
28+
expect(MetaspacePreTokenizer).toBeDefined();
2729
expect(Whitespace).toBeDefined();
2830
});
2931

30-
it("should import models", () => {
32+
it("should import models from main export", () => {
3133
expect(BPE).toBeDefined();
3234
});
3335

34-
it("should import normalizers", () => {
36+
it("should import normalizers from main export", () => {
3537
expect(Lowercase).toBeDefined();
3638
expect(StripAccents).toBeDefined();
3739
});
3840

39-
it("should import decoders", () => {
41+
it("should import decoders from main export", () => {
4042
expect(BPEDecoder).toBeDefined();
4143
});
4244

43-
it("should import post-processors", () => {
45+
it("should import post-processors from main export", () => {
4446
expect(TemplateProcessing).toBeDefined();
4547
});
4648
});

0 commit comments

Comments
 (0)