@@ -3,7 +3,6 @@ import { join } from 'node:path'
33import { fileURLToPath } from 'node:url'
44import { describe , expect , it } from 'vitest'
55import {
6- approximateTokenSize ,
76 estimateTokenCount ,
87 isWithinTokenLimit ,
98 sliceByTokens ,
@@ -15,38 +14,28 @@ describe('token-related functions', () => {
1514 const ENGLISH_TEXT = 'Hello, world! This is a short sentence.'
1615 const GERMAN_TEXT = 'Die pünktlich gewünschte Trüffelfüllung im übergestülpten Würzkümmel-Würfel ist kümmerlich und dürfte fürderhin zu Rüffeln in Hülle und Fülle führen'
1716
18- describe ( 'approximateTokenSize (legacy) ' , ( ) => {
19- it ( 'should approximate the token size for short English text' , ( ) => {
20- expect ( approximateTokenSize ( ENGLISH_TEXT ) ) . toMatchInlineSnapshot ( '11' )
17+ describe ( 'estimateTokenCount ' , ( ) => {
18+ it ( 'should estimate tokens for short English text' , ( ) => {
19+ expect ( estimateTokenCount ( ENGLISH_TEXT ) ) . toMatchInlineSnapshot ( '11' )
2120 } )
2221
23- it ( 'should approximate the token size for short German text with umlauts' , ( ) => {
24- expect ( approximateTokenSize ( GERMAN_TEXT ) ) . toMatchInlineSnapshot ( '49' )
22+ it ( 'should estimate tokens for German text with umlauts' , ( ) => {
23+ expect ( estimateTokenCount ( GERMAN_TEXT ) ) . toMatchInlineSnapshot ( '49' )
2524 } )
2625
2726 it ( 'should approximate the token size for English ebook' , async ( ) => {
2827 const input = await readFile ( join ( fixturesDir , 'ebooks/pg5200.txt' ) , 'utf-8' )
29- expect ( approximateTokenSize ( input ) ) . toMatchInlineSnapshot ( `35705` )
28+ expect ( estimateTokenCount ( input ) ) . toMatchInlineSnapshot ( `35705` )
3029 } )
3130
3231 it ( 'should approximate the token size for German ebook' , async ( ) => {
3332 const input = await readFile ( join ( fixturesDir , 'ebooks/pg22367.txt' ) , 'utf-8' )
34- expect ( approximateTokenSize ( input ) ) . toMatchInlineSnapshot ( `35069` )
33+ expect ( estimateTokenCount ( input ) ) . toMatchInlineSnapshot ( `35069` )
3534 } )
3635
3736 it ( 'should approximate the token size for Chinese ebook' , async ( ) => {
3837 const input = await readFile ( join ( fixturesDir , 'ebooks/pg7337.txt' ) , 'utf-8' )
39- expect ( approximateTokenSize ( input ) ) . toMatchInlineSnapshot ( `12059` )
40- } )
41- } )
42-
43- describe ( 'estimateTokenCount' , ( ) => {
44- it ( 'should estimate tokens for short English text' , ( ) => {
45- expect ( estimateTokenCount ( ENGLISH_TEXT ) ) . toMatchInlineSnapshot ( '11' )
46- } )
47-
48- it ( 'should estimate tokens for German text with umlauts' , ( ) => {
49- expect ( estimateTokenCount ( GERMAN_TEXT ) ) . toMatchInlineSnapshot ( '49' )
38+ expect ( estimateTokenCount ( input ) ) . toMatchInlineSnapshot ( `12059` )
5039 } )
5140
5241 it ( 'should handle empty input' , ( ) => {
0 commit comments