22 * Benchmark comparing all three MAF adapter implementations using indexed queries
33 *
44 * Run with:
5- * node --expose-gc --experimental-strip-types src /adapterComparison.bench.ts
5+ * node --expose-gc --experimental-strip-types benchmarks /adapterComparison.bench.ts
66 *
77 * Tests both:
88 * - C. elegans 7-way alignment (small organism count)
99 * - Zoonomia 447-way alignment (large organism count)
1010 */
1111
12- import { unzip } from '@gmod/bgzf-filehandle'
1312import { BigBed } from '@gmod/bbi'
13+ import { unzip } from '@gmod/bgzf-filehandle'
1414import { LocalFile } from 'generic-filehandle2'
1515
1616// Test datasets
@@ -47,15 +47,25 @@ const DATASETS: Record<string, Dataset> = {
4747 mafTabix : null ,
4848 regions : [
4949 { name : 'small (1kb)' , refName : 'chr22' , start : 22000000 , end : 22001000 } ,
50- { name : 'medium (10kb)' , refName : 'chr22' , start : 22000000 , end : 22010000 } ,
51- { name : 'large (100kb)' , refName : 'chr22' , start : 22000000 , end : 22100000 } ,
50+ {
51+ name : 'medium (10kb)' ,
52+ refName : 'chr22' ,
53+ start : 22000000 ,
54+ end : 22010000 ,
55+ } ,
56+ {
57+ name : 'large (100kb)' ,
58+ refName : 'chr22' ,
59+ start : 22000000 ,
60+ end : 22100000 ,
61+ } ,
5262 ] ,
5363 } ,
5464}
5565
5666function forceGC ( ) : void {
57- if ( typeof global !== 'undefined' && ( global as any ) . gc ) {
58- ; ( global as any ) . gc ( )
67+ if ( ( globalThis as Record < string , unknown > ) . gc ) {
68+ ; ( globalThis as Record < string , unknown > ) . gc ?. ( )
5969 }
6070}
6171
@@ -91,17 +101,25 @@ async function benchmarkBigMaf(
91101 const bb = new BigBed ( { filehandle : file } )
92102
93103 const currentMem1 = getMemoryMB ( )
94- if ( currentMem1 > peakMem ) peakMem = currentMem1
104+ if ( currentMem1 > peakMem ) {
105+ peakMem = currentMem1
106+ }
95107
96- const features = await bb . getFeatures ( region . refName , region . start , region . end )
108+ const features = await bb . getFeatures (
109+ region . refName ,
110+ region . start ,
111+ region . end ,
112+ )
97113
98114 const currentMem2 = getMemoryMB ( )
99- if ( currentMem2 > peakMem ) peakMem = currentMem2
115+ if ( currentMem2 > peakMem ) {
116+ peakMem = currentMem2
117+ }
100118
101119 let count = 0
102120 for ( const feature of features ) {
103121 count ++
104- const mafBlock = ( feature as any ) . mafBlock as string | undefined
122+ const mafBlock = feature . mafBlock as string | undefined
105123 if ( mafBlock ) {
106124 const blocks = mafBlock . split ( ';' )
107125 for ( const block of blocks ) {
@@ -112,7 +130,9 @@ async function benchmarkBigMaf(
112130 }
113131 }
114132 const currentMem = getMemoryMB ( )
115- if ( currentMem > peakMem ) peakMem = currentMem
133+ if ( currentMem > peakMem ) {
134+ peakMem = currentMem
135+ }
116136 }
117137
118138 const endTime = performance . now ( )
@@ -153,15 +173,19 @@ async function readTaiIndex(path: string): Promise<Map<string, TaiEntry[]>> {
153173 const currChr = isRelative ? lastChr : chr ! . split ( '.' ) . at ( - 1 ) !
154174
155175 const absOffset = isRelative ? lastRawOffset + + offsetStr ! : + offsetStr !
156- const absChrStart = isRelative ? lastChrStart + + chrStartStr ! : + chrStartStr !
176+ const absChrStart = isRelative
177+ ? lastChrStart + + chrStartStr !
178+ : + chrStartStr !
157179
158180 const blockPosition = Math . floor ( absOffset / 65536 )
159181 const dataPosition = absOffset % 65536
160182
161183 if ( ! index . has ( currChr ) ) {
162184 index . set ( currChr , [ ] )
163185 }
164- index . get ( currChr ) ! . push ( { chrStart : absChrStart , blockPosition, dataPosition } )
186+ index
187+ . get ( currChr ) !
188+ . push ( { chrStart : absChrStart , blockPosition, dataPosition } )
165189
166190 lastChr = currChr
167191 lastChrStart = absChrStart
@@ -218,17 +242,22 @@ async function benchmarkTaffy(
218242 const file = new LocalFile ( `${ dataset . dir } /${ dataset . taf } ` )
219243 const startBlock = firstEntry . blockPosition
220244 const endBlock = nextEntry . blockPosition
221- const readLength = endBlock > startBlock ? endBlock - startBlock + 65536 : 65536
245+ const readLength =
246+ endBlock > startBlock ? endBlock - startBlock + 65536 : 65536
222247
223248 const compressedData = await file . read ( readLength , startBlock )
224249
225250 const currentMem1 = getMemoryMB ( )
226- if ( currentMem1 > peakMem ) peakMem = currentMem1
251+ if ( currentMem1 > peakMem ) {
252+ peakMem = currentMem1
253+ }
227254
228255 const buffer = await unzip ( compressedData )
229256
230257 const currentMem2 = getMemoryMB ( )
231- if ( currentMem2 > peakMem ) peakMem = currentMem2
258+ if ( currentMem2 > peakMem ) {
259+ peakMem = currentMem2
260+ }
232261
233262 const decoder = new TextDecoder ( 'ascii' )
234263 const startOffset = firstEntry . dataPosition
@@ -242,14 +271,18 @@ async function benchmarkTaffy(
242271 let count = 0
243272 for ( const line of lines ) {
244273 const trimmed = line . trim ( )
245- if ( ! trimmed || trimmed . startsWith ( '#' ) ) continue
274+ if ( ! trimmed || trimmed . startsWith ( '#' ) ) {
275+ continue
276+ }
246277
247278 if ( trimmed . includes ( ' ; ' ) ) {
248279 count ++
249280 }
250281
251282 const currentMem = getMemoryMB ( )
252- if ( currentMem > peakMem ) peakMem = currentMem
283+ if ( currentMem > peakMem ) {
284+ peakMem = currentMem
285+ }
253286 }
254287
255288 const endTime = performance . now ( )
@@ -272,7 +305,9 @@ async function benchmarkMafTabix(
272305 dataset : Dataset ,
273306 region : { refName : string ; start : number ; end : number } ,
274307) : Promise < BenchmarkResult | null > {
275- if ( ! dataset . mafTabix ) return null
308+ if ( ! dataset . mafTabix ) {
309+ return null
310+ }
276311
277312 forceGC ( )
278313 await new Promise ( r => setTimeout ( r , 50 ) )
@@ -285,20 +320,26 @@ async function benchmarkMafTabix(
285320 const compressedData = await file . readFile ( )
286321
287322 const currentMem1 = getMemoryMB ( )
288- if ( currentMem1 > peakMem ) peakMem = currentMem1
323+ if ( currentMem1 > peakMem ) {
324+ peakMem = currentMem1
325+ }
289326
290327 const buffer = await unzip ( compressedData )
291328
292329 const currentMem2 = getMemoryMB ( )
293- if ( currentMem2 > peakMem ) peakMem = currentMem2
330+ if ( currentMem2 > peakMem ) {
331+ peakMem = currentMem2
332+ }
294333
295334 const decoder = new TextDecoder ( 'utf-8' )
296335 const text = decoder . decode ( buffer )
297336 const lines = text . split ( '\n' )
298337
299338 let count = 0
300339 for ( const line of lines ) {
301- if ( ! line . trim ( ) ) continue
340+ if ( ! line . trim ( ) ) {
341+ continue
342+ }
302343 const parts = line . split ( '\t' )
303344 const start = parseInt ( parts [ 1 ] ! , 10 )
304345 const end = parseInt ( parts [ 2 ] ! , 10 )
@@ -315,7 +356,9 @@ async function benchmarkMafTabix(
315356 }
316357
317358 const currentMem = getMemoryMB ( )
318- if ( currentMem > peakMem ) peakMem = currentMem
359+ if ( currentMem > peakMem ) {
360+ peakMem = currentMem
361+ }
319362 }
320363
321364 const endTime = performance . now ( )
@@ -340,8 +383,10 @@ async function runBenchmarks() {
340383 console . log ( '=' . repeat ( 80 ) )
341384 console . log ( '' )
342385
343- if ( typeof ( global as any ) . gc !== 'function' ) {
344- console . log ( 'WARNING: Run with --expose-gc for accurate memory measurements' )
386+ if ( typeof ( globalThis as Record < string , unknown > ) . gc !== 'function' ) {
387+ console . log (
388+ 'WARNING: Run with --expose-gc for accurate memory measurements' ,
389+ )
345390 console . log ( '' )
346391 }
347392
@@ -362,7 +407,8 @@ async function runBenchmarks() {
362407 console . log ( ` TAF: ${ ( tafSize / 1024 / 1024 ) . toFixed ( 2 ) } MB` )
363408 console . log ( ` BigMaf: ${ ( bbSize / 1024 / 1024 ) . toFixed ( 2 ) } MB` )
364409 if ( dataset . mafTabix ) {
365- const bedSize = ( await fs . stat ( `${ dataset . dir } /${ dataset . mafTabix } ` ) ) . size
410+ const bedSize = ( await fs . stat ( `${ dataset . dir } /${ dataset . mafTabix } ` ) )
411+ . size
366412 console . log ( ` MafTabix: ${ ( bedSize / 1024 / 1024 ) . toFixed ( 2 ) } MB` )
367413 }
368414 console . log ( '' )
@@ -373,7 +419,9 @@ async function runBenchmarks() {
373419
374420 for ( const region of dataset . regions ) {
375421 console . log ( '-' . repeat ( 80 ) )
376- console . log ( `Region: ${ region . name } (${ region . refName } :${ region . start } -${ region . end } )` )
422+ console . log (
423+ `Region: ${ region . name } (${ region . refName } :${ region . start } -${ region . end } )` ,
424+ )
377425 console . log ( '-' . repeat ( 80 ) )
378426
379427 // BigMaf
@@ -436,7 +484,9 @@ async function runBenchmarks() {
436484
437485 for ( const [ _key , dataset ] of Object . entries ( DATASETS ) ) {
438486 console . log ( `${ dataset . name } :` )
439- console . log ( 'Region | Format | Time (ms) | Features | Peak Mem (MB)' )
487+ console . log (
488+ 'Region | Format | Time (ms) | Features | Peak Mem (MB)' ,
489+ )
440490 console . log ( '-' . repeat ( 70 ) )
441491
442492 const datasetResults = allResults . filter ( r => r . dataset === dataset . name )
@@ -447,7 +497,7 @@ async function runBenchmarks() {
447497 console . log (
448498 `${ r . region . padEnd ( 16 ) } | ${ r . adapter . padEnd ( 9 ) } | ` +
449499 `${ r . timeMs . toFixed ( 0 ) . padStart ( 9 ) } | ${ r . featureCount . toString ( ) . padStart ( 8 ) } | ` +
450- ` ${ r . peakMemoryMB . toFixed ( 1 ) . padStart ( 12 ) } ` ,
500+ r . peakMemoryMB . toFixed ( 1 ) . padStart ( 12 ) ,
451501 )
452502 }
453503 }
@@ -462,20 +512,30 @@ async function runBenchmarks() {
462512
463513 for ( const [ _key , dataset ] of Object . entries ( DATASETS ) ) {
464514 const datasetResults = allResults . filter ( r => r . dataset === dataset . name )
465- const largeResults = datasetResults . filter ( r => r . region === 'large (100kb)' )
515+ const largeResults = datasetResults . filter (
516+ r => r . region === 'large (100kb)' ,
517+ )
466518
467519 if ( largeResults . length > 0 ) {
468520 const fastest = [ ...largeResults ] . sort ( ( a , b ) => a . timeMs - b . timeMs ) [ 0 ] !
469- const lowestMem = [ ...largeResults ] . sort ( ( a , b ) => a . peakMemoryMB - b . peakMemoryMB ) [ 0 ] !
521+ const lowestMem = [ ...largeResults ] . sort (
522+ ( a , b ) => a . peakMemoryMB - b . peakMemoryMB ,
523+ ) [ 0 ] !
470524
471525 console . log ( `${ dataset . name } (large 100kb query):` )
472- console . log ( ` Fastest: ${ fastest . adapter } (${ fastest . timeMs . toFixed ( 0 ) } ms)` )
473- console . log ( ` Lowest memory: ${ lowestMem . adapter } (${ lowestMem . peakMemoryMB . toFixed ( 1 ) } MB)` )
526+ console . log (
527+ ` Fastest: ${ fastest . adapter } (${ fastest . timeMs . toFixed ( 0 ) } ms)` ,
528+ )
529+ console . log (
530+ ` Lowest memory: ${ lowestMem . adapter } (${ lowestMem . peakMemoryMB . toFixed ( 1 ) } MB)` ,
531+ )
474532
475533 for ( const r of largeResults ) {
476534 if ( r !== fastest ) {
477535 const ratio = r . timeMs / fastest . timeMs
478- console . log ( ` ${ r . adapter } is ${ ratio . toFixed ( 1 ) } x slower than ${ fastest . adapter } ` )
536+ console . log (
537+ ` ${ r . adapter } is ${ ratio . toFixed ( 1 ) } x slower than ${ fastest . adapter } ` ,
538+ )
479539 }
480540 }
481541 console . log ( '' )
0 commit comments