Skip to content

Commit 2ee9735

Browse files
authored
Add back TAF adapter (#42)
1 parent a6075ce commit 2ee9735

67 files changed

Lines changed: 6741 additions & 894 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmarks/adapterComparison.bench.ts

Lines changed: 546 additions & 0 deletions
Large diffs are not rendered by default.

benchmarks/memoryBenchmark.ts

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
/**
2+
* Memory benchmark comparing streaming vs non-streaming approaches
3+
*
4+
* This demonstrates the memory benefits of streaming feature processing
5+
* vs collecting all features into an array before processing.
6+
*
7+
* Run with:
8+
* node --expose-gc --experimental-strip-types benchmarks/memoryBenchmark.ts
9+
*
10+
* The --expose-gc flag enables manual garbage collection for accurate measurements.
11+
*/
12+
13+
// Configuration - adjust these to test different scenarios
14+
const CONFIG = {
15+
numFeatures: 100, // Number of MAF alignment blocks
16+
numOrganisms: 200, // Number of species/organisms per block
17+
seqLength: 10000, // Sequence length in base pairs
18+
}
19+
20+
// Parse command line args for custom config
21+
for (let i = 2; i < process.argv.length; i++) {
22+
const arg = process.argv[i]!
23+
if (arg.startsWith('--features=')) {
24+
CONFIG.numFeatures = parseInt(arg.split('=')[1]!, 10)
25+
} else if (arg.startsWith('--organisms=')) {
26+
CONFIG.numOrganisms = parseInt(arg.split('=')[1]!, 10)
27+
} else if (arg.startsWith('--seqLength=')) {
28+
CONFIG.seqLength = parseInt(arg.split('=')[1]!, 10)
29+
}
30+
}
31+
32+
// Generate organism names
33+
function generateOrgNames(count: number): string[] {
34+
const baseOrgs = [
35+
'hg38',
36+
'mm10',
37+
'rn6',
38+
'canFam3',
39+
'felCat8',
40+
'bosTau8',
41+
'oviAri3',
42+
'susScr11',
43+
'equCab2',
44+
'galGal5',
45+
'danRer10',
46+
'xenTro9',
47+
'latCha1',
48+
]
49+
const orgs: string[] = []
50+
for (let i = 0; i < count; i++) {
51+
orgs.push(i < baseOrgs.length ? baseOrgs[i]! : `org${i}`)
52+
}
53+
return orgs
54+
}
55+
56+
interface SimulatedFeature {
57+
id: string
58+
start: number
59+
end: number
60+
seq: string
61+
alignments: Record<string, { chr: string; start: number; seq: string }>
62+
}
63+
64+
// Generate a realistic feature with alignments (similar to MAF block)
65+
function generateFeature(
66+
featureIndex: number,
67+
numOrganisms: number,
68+
seqLength: number,
69+
): SimulatedFeature {
70+
const orgs = generateOrgNames(numOrganisms)
71+
const bases = ['A', 'C', 'G', 'T', '-']
72+
const alignments: Record<
73+
string,
74+
{ chr: string; start: number; seq: string }
75+
> = {}
76+
77+
const refSeq = Array.from(
78+
{ length: seqLength },
79+
() => bases[Math.floor(Math.random() * bases.length)],
80+
).join('')
81+
82+
for (let i = 0; i < numOrganisms; i++) {
83+
const org = orgs[i]!
84+
const seq = Array.from(
85+
{ length: seqLength },
86+
() => bases[Math.floor(Math.random() * bases.length)],
87+
).join('')
88+
alignments[org] = {
89+
chr: 'chr1',
90+
start: featureIndex * seqLength + i * 100,
91+
seq,
92+
}
93+
}
94+
95+
return {
96+
id: `feature-${featureIndex}`,
97+
start: featureIndex * seqLength,
98+
end: (featureIndex + 1) * seqLength,
99+
seq: refSeq,
100+
alignments,
101+
}
102+
}
103+
104+
// Simulate processing a feature (like rendering to canvas)
105+
function processFeature(feature: SimulatedFeature): number {
106+
let result = 0
107+
for (const [_org, alignment] of Object.entries(feature.alignments)) {
108+
for (let i = 0; i < alignment.seq.length; i++) {
109+
if (alignment.seq[i] === feature.seq[i]) {
110+
result++
111+
}
112+
}
113+
}
114+
return result
115+
}
116+
117+
function forceGC(): void {
118+
if ((globalThis as Record<string, unknown>).gc) {
119+
;(globalThis as Record<string, unknown>).gc?.()
120+
}
121+
}
122+
123+
function getMemoryMB(): number {
124+
return Math.round((process.memoryUsage().heapUsed / 1024 / 1024) * 100) / 100
125+
}
126+
127+
function formatMemory(mb: number): string {
128+
return `${mb.toFixed(2)} MB`
129+
}
130+
131+
function formatTime(ms: number): string {
132+
return `${Math.round(ms)} ms`
133+
}
134+
135+
async function runMemoryBenchmark() {
136+
const { numFeatures, numOrganisms, seqLength } = CONFIG
137+
138+
console.log('='.repeat(70))
139+
console.log('MEMORY BENCHMARK: Streaming vs Non-Streaming')
140+
console.log('='.repeat(70))
141+
console.log('')
142+
console.log('Configuration:')
143+
console.log(` Features: ${numFeatures}`)
144+
console.log(` Organisms: ${numOrganisms}`)
145+
console.log(` Seq Length: ${seqLength} bp`)
146+
console.log('')
147+
console.log('This simulates the MAF viewer rendering pipeline:')
148+
console.log(
149+
' - Non-streaming: collect all features, then render (old approach)',
150+
)
151+
console.log(
152+
' - Streaming: render each feature as it arrives (current approach)',
153+
)
154+
console.log('')
155+
156+
// Check if GC is available
157+
if (typeof (globalThis as Record<string, unknown>).gc !== 'function') {
158+
console.log(
159+
'WARNING: Run with --expose-gc for accurate memory measurements',
160+
)
161+
console.log('')
162+
}
163+
164+
// Warmup
165+
forceGC()
166+
await new Promise(r => setTimeout(r, 100))
167+
168+
// =========================================================================
169+
// Test 1: Non-streaming (collect all features first)
170+
// =========================================================================
171+
console.log('-'.repeat(70))
172+
console.log('NON-STREAMING (collect all features, then process)')
173+
console.log('-'.repeat(70))
174+
forceGC()
175+
await new Promise(r => setTimeout(r, 100))
176+
177+
const baselineNonStream = getMemoryMB()
178+
let peakMemoryNonStream = baselineNonStream
179+
const startTimeNonStream = performance.now()
180+
181+
// Step 1: Collect all features (simulating toArray())
182+
const collected: SimulatedFeature[] = []
183+
for (let i = 0; i < numFeatures; i++) {
184+
collected.push(generateFeature(i, numOrganisms, seqLength))
185+
const currentMem = getMemoryMB()
186+
if (currentMem > peakMemoryNonStream) {
187+
peakMemoryNonStream = currentMem
188+
}
189+
}
190+
191+
// Step 2: Process all collected features
192+
let totalNonStream = 0
193+
for (const f of collected) {
194+
totalNonStream += processFeature(f)
195+
const currentMem = getMemoryMB()
196+
if (currentMem > peakMemoryNonStream) {
197+
peakMemoryNonStream = currentMem
198+
}
199+
}
200+
201+
const endTimeNonStream = performance.now()
202+
const finalMemNonStream = getMemoryMB()
203+
204+
console.log(` Baseline: ${formatMemory(baselineNonStream)}`)
205+
console.log(` Peak: ${formatMemory(peakMemoryNonStream)}`)
206+
console.log(` Final: ${formatMemory(finalMemNonStream)}`)
207+
console.log(
208+
` Time: ${formatTime(endTimeNonStream - startTimeNonStream)}`,
209+
)
210+
console.log('')
211+
212+
// Clear and GC
213+
collected.length = 0
214+
forceGC()
215+
await new Promise(r => setTimeout(r, 500))
216+
217+
// =========================================================================
218+
// Test 2: Streaming (process each feature immediately)
219+
// =========================================================================
220+
console.log('-'.repeat(70))
221+
console.log('STREAMING (process each feature as it arrives)')
222+
console.log('-'.repeat(70))
223+
forceGC()
224+
await new Promise(r => setTimeout(r, 100))
225+
226+
const baselineStream = getMemoryMB()
227+
let peakMemoryStream = baselineStream
228+
const startTimeStream = performance.now()
229+
230+
// Generate and process one at a time
231+
let totalStream = 0
232+
for (let i = 0; i < numFeatures; i++) {
233+
const feature = generateFeature(i, numOrganisms, seqLength)
234+
totalStream += processFeature(feature)
235+
// Feature can now be GC'd
236+
237+
const currentMem = getMemoryMB()
238+
if (currentMem > peakMemoryStream) {
239+
peakMemoryStream = currentMem
240+
}
241+
242+
// Periodically force GC to simulate real-world conditions
243+
if (i % 20 === 0) {
244+
forceGC()
245+
}
246+
}
247+
248+
const endTimeStream = performance.now()
249+
forceGC()
250+
await new Promise(r => setTimeout(r, 100))
251+
const finalMemStream = getMemoryMB()
252+
253+
console.log(` Baseline: ${formatMemory(baselineStream)}`)
254+
console.log(` Peak: ${formatMemory(peakMemoryStream)}`)
255+
console.log(` Final: ${formatMemory(finalMemStream)}`)
256+
console.log(` Time: ${formatTime(endTimeStream - startTimeStream)}`)
257+
console.log('')
258+
259+
// =========================================================================
260+
// Summary
261+
// =========================================================================
262+
console.log('='.repeat(70))
263+
console.log('SUMMARY')
264+
console.log('='.repeat(70))
265+
console.log('')
266+
267+
const memoryReduction =
268+
((peakMemoryNonStream - peakMemoryStream) / peakMemoryNonStream) * 100
269+
const timeDiff =
270+
endTimeStream - startTimeStream - (endTimeNonStream - startTimeNonStream)
271+
const timeRatio =
272+
(endTimeNonStream - startTimeNonStream) / (endTimeStream - startTimeStream)
273+
274+
console.log('Peak Memory:')
275+
console.log(` Non-streaming: ${formatMemory(peakMemoryNonStream)}`)
276+
console.log(` Streaming: ${formatMemory(peakMemoryStream)}`)
277+
if (memoryReduction > 0) {
278+
console.log(
279+
` Reduction: ${memoryReduction.toFixed(1)}% less memory with streaming`,
280+
)
281+
}
282+
console.log('')
283+
284+
console.log('Execution Time:')
285+
console.log(
286+
` Non-streaming: ${formatTime(endTimeNonStream - startTimeNonStream)}`,
287+
)
288+
console.log(` Streaming: ${formatTime(endTimeStream - startTimeStream)}`)
289+
console.log(
290+
` Difference: ${timeDiff > 0 ? '+' : ''}${formatTime(timeDiff)}`,
291+
)
292+
console.log('')
293+
294+
console.log('Conclusion:')
295+
if (memoryReduction > 10) {
296+
console.log(
297+
` ✓ Streaming reduces peak memory by ${memoryReduction.toFixed(0)}%`,
298+
)
299+
console.log(
300+
` ✓ This is significant for large MAF files with many organisms`,
301+
)
302+
if (timeRatio < 1) {
303+
console.log(
304+
` ✓ Streaming is also ${((1 / timeRatio - 1) * 100).toFixed(0)}% faster`,
305+
)
306+
} else if (timeRatio > 1.1) {
307+
console.log(
308+
` ! Streaming is ${((timeRatio - 1) * 100).toFixed(0)}% slower (GC overhead)`,
309+
)
310+
console.log(
311+
` This tradeoff is worthwhile for memory-constrained scenarios`,
312+
)
313+
}
314+
} else {
315+
console.log(
316+
` Memory difference is minimal (${memoryReduction.toFixed(1)}%)`,
317+
)
318+
console.log(` Run with --expose-gc for accurate measurements`)
319+
}
320+
console.log('')
321+
}
322+
323+
runMemoryBenchmark().catch(console.error)

eslint.config.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export default tseslint.config(
1313
'example/*',
1414
'eslint.config.mjs',
1515
'esbuild.mjs',
16+
'benchmarks/*',
1617
],
1718
},
1819
{

package.json

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
"clean": "rimraf dist",
1818
"start": "node esbuild.mjs --watch",
1919
"test": "vitest",
20+
"bench": "vitest bench",
21+
"bench:memory": "node --expose-gc --experimental-strip-types src/BigMafAdapter/memoryBenchmark.ts",
2022
"format": "prettier --write .",
2123
"prebuild": "yarn clean",
2224
"build": "tsc && NODE_ENV=production node esbuild.mjs",
@@ -31,17 +33,18 @@
3133
"@babel/preset-react": "^7.10.4",
3234
"@emotion/react": "^11.10.4",
3335
"@fal-works/esbuild-plugin-global-externals": "^2.1.2",
34-
"@jbrowse/core": "^4.1.1",
36+
"@gmod/bbi": "^8.1.1",
37+
"@jbrowse/core": "^4.1.3",
3538
"@jbrowse/mobx-state-tree": "^5.4.1",
36-
"@jbrowse/plugin-data-management": "^4.1.1",
37-
"@jbrowse/plugin-linear-genome-view": "^4.1.1",
39+
"@jbrowse/plugin-data-management": "^4.1.3",
40+
"@jbrowse/plugin-linear-genome-view": "^4.1.3",
3841
"@mui/material": "^7.0.1",
3942
"@mui/system": "^7.0.1",
4043
"@mui/x-data-grid": "^8.2.0",
4144
"@types/d3-array": "^3.2.1",
4245
"@types/d3-hierarchy": "^3.1.7",
43-
"@types/node": "^24.7.0",
44-
"@types/react": "^19.0.1",
46+
"@types/node": "^25.0.10",
47+
"@types/react": "^19.2.10",
4548
"chalk": "^5.3.0",
4649
"esbuild": "^0.27.0",
4750
"eslint": "^9.17.0",
@@ -53,14 +56,14 @@
5356
"mobx-react": "^9.0.1",
5457
"prettier": "^3.4.2",
5558
"pretty-bytes": "^7.0.0",
56-
"react": "^19.0.0",
57-
"react-dom": "^19.0.0",
59+
"react": "^19.2.4",
60+
"react-dom": "^19.2.4",
5861
"rimraf": "^6.0.1",
5962
"rxjs": "^7.8.1",
6063
"serve": "^14.2.0",
6164
"tss-react": "^4.9.18",
6265
"typescript": "^5.1.6",
63-
"typescript-eslint": "^8.18.0",
66+
"typescript-eslint": "^8.54.0",
6467
"vitest": "^4.0.5"
6568
},
6669
"dependencies": {
@@ -71,7 +74,6 @@
7174
"d3-hierarchy": "^3.1.2",
7275
"fast-deep-equal": "^3.1.3",
7376
"flatbush": "^4.4.1",
74-
"generic-filehandle2": "^2.0.1",
75-
"long": "^5.2.3"
77+
"generic-filehandle2": "^2.0.1"
7678
}
7779
}

0 commit comments

Comments
 (0)