Skip to content

Commit 46915ba

Browse files
committed
Updates
1 parent ea9cef5 commit 46915ba

20 files changed

Lines changed: 276 additions & 1784 deletions
Lines changed: 94 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
* Benchmark comparing all three MAF adapter implementations using indexed queries
33
*
44
* Run with:
5-
* node --expose-gc --experimental-strip-types src/adapterComparison.bench.ts
5+
* node --expose-gc --experimental-strip-types benchmarks/adapterComparison.bench.ts
66
*
77
* Tests both:
88
* - C. elegans 7-way alignment (small organism count)
99
* - Zoonomia 447-way alignment (large organism count)
1010
*/
1111

12-
import { unzip } from '@gmod/bgzf-filehandle'
1312
import { BigBed } from '@gmod/bbi'
13+
import { unzip } from '@gmod/bgzf-filehandle'
1414
import { LocalFile } from 'generic-filehandle2'
1515

1616
// Test datasets
@@ -47,15 +47,25 @@ const DATASETS: Record<string, Dataset> = {
4747
mafTabix: null,
4848
regions: [
4949
{ name: 'small (1kb)', refName: 'chr22', start: 22000000, end: 22001000 },
50-
{ name: 'medium (10kb)', refName: 'chr22', start: 22000000, end: 22010000 },
51-
{ name: 'large (100kb)', refName: 'chr22', start: 22000000, end: 22100000 },
50+
{
51+
name: 'medium (10kb)',
52+
refName: 'chr22',
53+
start: 22000000,
54+
end: 22010000,
55+
},
56+
{
57+
name: 'large (100kb)',
58+
refName: 'chr22',
59+
start: 22000000,
60+
end: 22100000,
61+
},
5262
],
5363
},
5464
}
5565

5666
function forceGC(): void {
57-
if (typeof global !== 'undefined' && (global as any).gc) {
58-
;(global as any).gc()
67+
if ((globalThis as Record<string, unknown>).gc) {
68+
;(globalThis as Record<string, unknown>).gc?.()
5969
}
6070
}
6171

@@ -91,17 +101,25 @@ async function benchmarkBigMaf(
91101
const bb = new BigBed({ filehandle: file })
92102

93103
const currentMem1 = getMemoryMB()
94-
if (currentMem1 > peakMem) peakMem = currentMem1
104+
if (currentMem1 > peakMem) {
105+
peakMem = currentMem1
106+
}
95107

96-
const features = await bb.getFeatures(region.refName, region.start, region.end)
108+
const features = await bb.getFeatures(
109+
region.refName,
110+
region.start,
111+
region.end,
112+
)
97113

98114
const currentMem2 = getMemoryMB()
99-
if (currentMem2 > peakMem) peakMem = currentMem2
115+
if (currentMem2 > peakMem) {
116+
peakMem = currentMem2
117+
}
100118

101119
let count = 0
102120
for (const feature of features) {
103121
count++
104-
const mafBlock = (feature as any).mafBlock as string | undefined
122+
const mafBlock = feature.mafBlock as string | undefined
105123
if (mafBlock) {
106124
const blocks = mafBlock.split(';')
107125
for (const block of blocks) {
@@ -112,7 +130,9 @@ async function benchmarkBigMaf(
112130
}
113131
}
114132
const currentMem = getMemoryMB()
115-
if (currentMem > peakMem) peakMem = currentMem
133+
if (currentMem > peakMem) {
134+
peakMem = currentMem
135+
}
116136
}
117137

118138
const endTime = performance.now()
@@ -153,15 +173,19 @@ async function readTaiIndex(path: string): Promise<Map<string, TaiEntry[]>> {
153173
const currChr = isRelative ? lastChr : chr!.split('.').at(-1)!
154174

155175
const absOffset = isRelative ? lastRawOffset + +offsetStr! : +offsetStr!
156-
const absChrStart = isRelative ? lastChrStart + +chrStartStr! : +chrStartStr!
176+
const absChrStart = isRelative
177+
? lastChrStart + +chrStartStr!
178+
: +chrStartStr!
157179

158180
const blockPosition = Math.floor(absOffset / 65536)
159181
const dataPosition = absOffset % 65536
160182

161183
if (!index.has(currChr)) {
162184
index.set(currChr, [])
163185
}
164-
index.get(currChr)!.push({ chrStart: absChrStart, blockPosition, dataPosition })
186+
index
187+
.get(currChr)!
188+
.push({ chrStart: absChrStart, blockPosition, dataPosition })
165189

166190
lastChr = currChr
167191
lastChrStart = absChrStart
@@ -218,17 +242,22 @@ async function benchmarkTaffy(
218242
const file = new LocalFile(`${dataset.dir}/${dataset.taf}`)
219243
const startBlock = firstEntry.blockPosition
220244
const endBlock = nextEntry.blockPosition
221-
const readLength = endBlock > startBlock ? endBlock - startBlock + 65536 : 65536
245+
const readLength =
246+
endBlock > startBlock ? endBlock - startBlock + 65536 : 65536
222247

223248
const compressedData = await file.read(readLength, startBlock)
224249

225250
const currentMem1 = getMemoryMB()
226-
if (currentMem1 > peakMem) peakMem = currentMem1
251+
if (currentMem1 > peakMem) {
252+
peakMem = currentMem1
253+
}
227254

228255
const buffer = await unzip(compressedData)
229256

230257
const currentMem2 = getMemoryMB()
231-
if (currentMem2 > peakMem) peakMem = currentMem2
258+
if (currentMem2 > peakMem) {
259+
peakMem = currentMem2
260+
}
232261

233262
const decoder = new TextDecoder('ascii')
234263
const startOffset = firstEntry.dataPosition
@@ -242,14 +271,18 @@ async function benchmarkTaffy(
242271
let count = 0
243272
for (const line of lines) {
244273
const trimmed = line.trim()
245-
if (!trimmed || trimmed.startsWith('#')) continue
274+
if (!trimmed || trimmed.startsWith('#')) {
275+
continue
276+
}
246277

247278
if (trimmed.includes(' ; ')) {
248279
count++
249280
}
250281

251282
const currentMem = getMemoryMB()
252-
if (currentMem > peakMem) peakMem = currentMem
283+
if (currentMem > peakMem) {
284+
peakMem = currentMem
285+
}
253286
}
254287

255288
const endTime = performance.now()
@@ -272,7 +305,9 @@ async function benchmarkMafTabix(
272305
dataset: Dataset,
273306
region: { refName: string; start: number; end: number },
274307
): Promise<BenchmarkResult | null> {
275-
if (!dataset.mafTabix) return null
308+
if (!dataset.mafTabix) {
309+
return null
310+
}
276311

277312
forceGC()
278313
await new Promise(r => setTimeout(r, 50))
@@ -285,20 +320,26 @@ async function benchmarkMafTabix(
285320
const compressedData = await file.readFile()
286321

287322
const currentMem1 = getMemoryMB()
288-
if (currentMem1 > peakMem) peakMem = currentMem1
323+
if (currentMem1 > peakMem) {
324+
peakMem = currentMem1
325+
}
289326

290327
const buffer = await unzip(compressedData)
291328

292329
const currentMem2 = getMemoryMB()
293-
if (currentMem2 > peakMem) peakMem = currentMem2
330+
if (currentMem2 > peakMem) {
331+
peakMem = currentMem2
332+
}
294333

295334
const decoder = new TextDecoder('utf-8')
296335
const text = decoder.decode(buffer)
297336
const lines = text.split('\n')
298337

299338
let count = 0
300339
for (const line of lines) {
301-
if (!line.trim()) continue
340+
if (!line.trim()) {
341+
continue
342+
}
302343
const parts = line.split('\t')
303344
const start = parseInt(parts[1]!, 10)
304345
const end = parseInt(parts[2]!, 10)
@@ -315,7 +356,9 @@ async function benchmarkMafTabix(
315356
}
316357

317358
const currentMem = getMemoryMB()
318-
if (currentMem > peakMem) peakMem = currentMem
359+
if (currentMem > peakMem) {
360+
peakMem = currentMem
361+
}
319362
}
320363

321364
const endTime = performance.now()
@@ -340,8 +383,10 @@ async function runBenchmarks() {
340383
console.log('='.repeat(80))
341384
console.log('')
342385

343-
if (typeof (global as any).gc !== 'function') {
344-
console.log('WARNING: Run with --expose-gc for accurate memory measurements')
386+
if (typeof (globalThis as Record<string, unknown>).gc !== 'function') {
387+
console.log(
388+
'WARNING: Run with --expose-gc for accurate memory measurements',
389+
)
345390
console.log('')
346391
}
347392

@@ -362,7 +407,8 @@ async function runBenchmarks() {
362407
console.log(` TAF: ${(tafSize / 1024 / 1024).toFixed(2)} MB`)
363408
console.log(` BigMaf: ${(bbSize / 1024 / 1024).toFixed(2)} MB`)
364409
if (dataset.mafTabix) {
365-
const bedSize = (await fs.stat(`${dataset.dir}/${dataset.mafTabix}`)).size
410+
const bedSize = (await fs.stat(`${dataset.dir}/${dataset.mafTabix}`))
411+
.size
366412
console.log(` MafTabix: ${(bedSize / 1024 / 1024).toFixed(2)} MB`)
367413
}
368414
console.log('')
@@ -373,7 +419,9 @@ async function runBenchmarks() {
373419

374420
for (const region of dataset.regions) {
375421
console.log('-'.repeat(80))
376-
console.log(`Region: ${region.name} (${region.refName}:${region.start}-${region.end})`)
422+
console.log(
423+
`Region: ${region.name} (${region.refName}:${region.start}-${region.end})`,
424+
)
377425
console.log('-'.repeat(80))
378426

379427
// BigMaf
@@ -436,7 +484,9 @@ async function runBenchmarks() {
436484

437485
for (const [_key, dataset] of Object.entries(DATASETS)) {
438486
console.log(`${dataset.name}:`)
439-
console.log('Region | Format | Time (ms) | Features | Peak Mem (MB)')
487+
console.log(
488+
'Region | Format | Time (ms) | Features | Peak Mem (MB)',
489+
)
440490
console.log('-'.repeat(70))
441491

442492
const datasetResults = allResults.filter(r => r.dataset === dataset.name)
@@ -447,7 +497,7 @@ async function runBenchmarks() {
447497
console.log(
448498
`${r.region.padEnd(16)} | ${r.adapter.padEnd(9)} | ` +
449499
`${r.timeMs.toFixed(0).padStart(9)} | ${r.featureCount.toString().padStart(8)} | ` +
450-
`${r.peakMemoryMB.toFixed(1).padStart(12)}`,
500+
r.peakMemoryMB.toFixed(1).padStart(12),
451501
)
452502
}
453503
}
@@ -462,20 +512,30 @@ async function runBenchmarks() {
462512

463513
for (const [_key, dataset] of Object.entries(DATASETS)) {
464514
const datasetResults = allResults.filter(r => r.dataset === dataset.name)
465-
const largeResults = datasetResults.filter(r => r.region === 'large (100kb)')
515+
const largeResults = datasetResults.filter(
516+
r => r.region === 'large (100kb)',
517+
)
466518

467519
if (largeResults.length > 0) {
468520
const fastest = [...largeResults].sort((a, b) => a.timeMs - b.timeMs)[0]!
469-
const lowestMem = [...largeResults].sort((a, b) => a.peakMemoryMB - b.peakMemoryMB)[0]!
521+
const lowestMem = [...largeResults].sort(
522+
(a, b) => a.peakMemoryMB - b.peakMemoryMB,
523+
)[0]!
470524

471525
console.log(`${dataset.name} (large 100kb query):`)
472-
console.log(` Fastest: ${fastest.adapter} (${fastest.timeMs.toFixed(0)} ms)`)
473-
console.log(` Lowest memory: ${lowestMem.adapter} (${lowestMem.peakMemoryMB.toFixed(1)} MB)`)
526+
console.log(
527+
` Fastest: ${fastest.adapter} (${fastest.timeMs.toFixed(0)} ms)`,
528+
)
529+
console.log(
530+
` Lowest memory: ${lowestMem.adapter} (${lowestMem.peakMemoryMB.toFixed(1)} MB)`,
531+
)
474532

475533
for (const r of largeResults) {
476534
if (r !== fastest) {
477535
const ratio = r.timeMs / fastest.timeMs
478-
console.log(` ${r.adapter} is ${ratio.toFixed(1)}x slower than ${fastest.adapter}`)
536+
console.log(
537+
` ${r.adapter} is ${ratio.toFixed(1)}x slower than ${fastest.adapter}`,
538+
)
479539
}
480540
}
481541
console.log('')

0 commit comments

Comments
 (0)