Skip to content

Commit 14ba6dd

Browse files
refactor: replace approximateTokenSize with estimateTokenCount and update tests
1 parent 28563ca commit 14ba6dd

File tree

3 files changed

+11
-22
lines changed

3 files changed

+11
-22
lines changed

scripts/generateTable.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { readFile, writeFile } from 'node:fs/promises'
22
import { join } from 'node:path'
33
import { fileURLToPath } from 'node:url'
44
import { encode } from 'gpt-tokenizer'
5-
import { approximateTokenSize } from '../src/index'
5+
import { estimateTokenCount } from '../src/index'
66

77
const rootDir = join(fileURLToPath(new URL('../', import.meta.url)))
88
const readmePath = join(rootDir, 'README.md')
@@ -49,7 +49,7 @@ for (const example of tokenExamples) {
4949
? (await readFile(example.input, 'utf-8'))
5050
: example.input
5151
const tokenCount = encode(text).length
52-
const estimatedTokenCount = approximateTokenSize(text)
52+
const estimatedTokenCount = estimateTokenCount(text)
5353
const errorPercentage = ((Math.abs(tokenCount - estimatedTokenCount) / tokenCount) * 100).toFixed(2)
5454

5555
markdownTable += `| ${[

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ export function isWithinTokenLimit(
3434
return estimateTokenCount(text, options) <= tokenLimit
3535
}
3636

37-
// Legacy alias for backward compatibility
37+
/** @deprecated Use `estimateTokenCount` instead */
3838
export const approximateTokenSize: typeof estimateTokenCount = estimateTokenCount
3939

4040
/**

test/index.test.ts

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { join } from 'node:path'
33
import { fileURLToPath } from 'node:url'
44
import { describe, expect, it } from 'vitest'
55
import {
6-
approximateTokenSize,
76
estimateTokenCount,
87
isWithinTokenLimit,
98
sliceByTokens,
@@ -15,38 +14,28 @@ describe('token-related functions', () => {
1514
const ENGLISH_TEXT = 'Hello, world! This is a short sentence.'
1615
const GERMAN_TEXT = 'Die pünktlich gewünschte Trüffelfüllung im übergestülpten Würzkümmel-Würfel ist kümmerlich und dürfte fürderhin zu Rüffeln in Hülle und Fülle führen'
1716

18-
describe('approximateTokenSize (legacy)', () => {
19-
it('should approximate the token size for short English text', () => {
20-
expect(approximateTokenSize(ENGLISH_TEXT)).toMatchInlineSnapshot('11')
17+
describe('estimateTokenCount', () => {
18+
it('should estimate tokens for short English text', () => {
19+
expect(estimateTokenCount(ENGLISH_TEXT)).toMatchInlineSnapshot('11')
2120
})
2221

23-
it('should approximate the token size for short German text with umlauts', () => {
24-
expect(approximateTokenSize(GERMAN_TEXT)).toMatchInlineSnapshot('49')
22+
it('should estimate tokens for German text with umlauts', () => {
23+
expect(estimateTokenCount(GERMAN_TEXT)).toMatchInlineSnapshot('49')
2524
})
2625

2726
it('should approximate the token size for English ebook', async () => {
2827
const input = await readFile(join(fixturesDir, 'ebooks/pg5200.txt'), 'utf-8')
29-
expect(approximateTokenSize(input)).toMatchInlineSnapshot(`35705`)
28+
expect(estimateTokenCount(input)).toMatchInlineSnapshot(`35705`)
3029
})
3130

3231
it('should approximate the token size for German ebook', async () => {
3332
const input = await readFile(join(fixturesDir, 'ebooks/pg22367.txt'), 'utf-8')
34-
expect(approximateTokenSize(input)).toMatchInlineSnapshot(`35069`)
33+
expect(estimateTokenCount(input)).toMatchInlineSnapshot(`35069`)
3534
})
3635

3736
it('should approximate the token size for Chinese ebook', async () => {
3837
const input = await readFile(join(fixturesDir, 'ebooks/pg7337.txt'), 'utf-8')
39-
expect(approximateTokenSize(input)).toMatchInlineSnapshot(`12059`)
40-
})
41-
})
42-
43-
describe('estimateTokenCount', () => {
44-
it('should estimate tokens for short English text', () => {
45-
expect(estimateTokenCount(ENGLISH_TEXT)).toMatchInlineSnapshot('11')
46-
})
47-
48-
it('should estimate tokens for German text with umlauts', () => {
49-
expect(estimateTokenCount(GERMAN_TEXT)).toMatchInlineSnapshot('49')
38+
expect(estimateTokenCount(input)).toMatchInlineSnapshot(`12059`)
5039
})
5140

5241
it('should handle empty input', () => {

0 commit comments

Comments
 (0)