Skip to content

Commit 32871ab

Browse files
committed
Read only requested samples
1 parent c43f609 commit 32871ab

7 files changed

Lines changed: 211 additions & 17 deletions

File tree

src/BgzipTaffyAdapter/BgzipTaffyAdapter.ts

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { parseAssemblyAndChrSimple } from '../util/parseAssemblyName'
1919

2020
import type { RowInstruction } from './rowInstructions'
2121
import type { AlignmentRecord, IndexData } from './types'
22+
import type { MafAdapterOptions } from '../types'
2223

2324
// Represents a row in the alignment (like Alignment_Row in C)
2425
interface RowState {
@@ -159,6 +160,7 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
159160
*parseTafBlocksStreaming(
160161
buffer: Uint8Array,
161162
runLengthEncodeBases: boolean,
163+
sampleFilter?: Set<string>,
162164
): Generator<TafFeature> {
163165
let pBlock: AlignmentBlock | undefined
164166
let currentBlock: AlignmentBlock | undefined
@@ -182,7 +184,7 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
182184
// If we have a current block with columns, finalize and yield it
183185
if (currentBlock && columns.length > 0) {
184186
this.finalizeBlock(currentBlock, columns)
185-
const feature = this.blockToFeature(currentBlock)
187+
const feature = this.blockToFeature(currentBlock, sampleFilter)
186188
if (feature) {
187189
yield feature
188190
}
@@ -234,7 +236,7 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
234236
// Finalize and yield last block
235237
if (currentBlock && columns.length > 0) {
236238
this.finalizeBlock(currentBlock, columns)
237-
const feature = this.blockToFeature(currentBlock)
239+
const feature = this.blockToFeature(currentBlock, sampleFilter)
238240
if (feature) {
239241
yield feature
240242
}
@@ -246,8 +248,15 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
246248
buffer: Uint8Array,
247249
runLengthEncodeBases: boolean,
248250
_opts?: BaseOptions,
251+
sampleFilter?: Set<string>,
249252
): TafFeature[] {
250-
return [...this.parseTafBlocksStreaming(buffer, runLengthEncodeBases)]
253+
return [
254+
...this.parseTafBlocksStreaming(
255+
buffer,
256+
runLengthEncodeBases,
257+
sampleFilter,
258+
),
259+
]
251260
}
252261

253262
// TextDecoder for efficient string building from typed array
@@ -280,7 +289,10 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
280289
}
281290
}
282291

283-
blockToFeature(block: AlignmentBlock): TafFeature | undefined {
292+
blockToFeature(
293+
block: AlignmentBlock,
294+
sampleFilter?: Set<string>,
295+
): TafFeature | undefined {
284296
if (block.rows.length === 0 || block.columnNumber === 0) {
285297
return undefined
286298
}
@@ -290,6 +302,9 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
290302

291303
for (const row of block.rows) {
292304
const { assemblyName, chr } = parseAssemblyAndChrSimple(row.sequenceName)
305+
if (sampleFilter && !sampleFilter.has(assemblyName)) {
306+
continue
307+
}
293308
alignments[assemblyName] = {
294309
chr,
295310
start: row.start,
@@ -392,12 +407,16 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
392407
return entries
393408
}
394409

395-
getFeatures(query: Region, opts?: BaseOptions) {
410+
getFeatures(query: Region, opts?: MafAdapterOptions) {
396411
const { statusCallback = () => {} } = opts || {}
397412
return ObservableCreate<Feature>(async observer => {
398413
try {
399414
const { index, runLengthEncodeBases } = await this.setup(opts)
400415

416+
const sampleFilter = opts?.samples
417+
? new Set(opts.samples.map(s => s.id))
418+
: undefined
419+
401420
// Get byte range for this query
402421
const records = index[query.refName]
403422
if (!records || records.length === 0) {
@@ -447,6 +466,7 @@ export default class BgzipTaffyAdapter extends BaseFeatureDataAdapter {
447466
for (const feat of this.parseTafBlocksStreaming(
448467
slice,
449468
runLengthEncodeBases,
469+
sampleFilter,
450470
)) {
451471
// Filter features that overlap with query region
452472
if (feat.end > query.start && feat.start < query.end) {

src/BigMafAdapter/BigMafAdapter.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ import { normalize } from '../util'
99
import { subscribeToObservable } from '../util/observableUtils'
1010
import { parseAssemblyAndChrSimple } from '../util/parseAssemblyName'
1111

12-
import type { AlignmentRecord } from '../types'
13-
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
12+
import type { AlignmentRecord, MafAdapterOptions } from '../types'
1413
import type { Feature, Region } from '@jbrowse/core/util'
1514
export default class BigMafAdapter extends BaseFeatureDataAdapter {
1615
public setupP?: Promise<{ adapter: BaseFeatureDataAdapter }>
@@ -46,12 +45,16 @@ export default class BigMafAdapter extends BaseFeatureDataAdapter {
4645
return adapter.getHeader()
4746
}
4847

49-
getFeatures(query: Region, opts?: BaseOptions) {
48+
getFeatures(query: Region, opts?: MafAdapterOptions) {
5049
const WHITESPACE_REGEX = / +/
5150

5251
return ObservableCreate<Feature>(async observer => {
5352
const { adapter } = await this.setupPre()
5453

54+
const sampleFilter = opts?.samples
55+
? new Set(opts.samples.map(s => s.id))
56+
: undefined
57+
5558
await subscribeToObservable(adapter.getFeatures(query, opts), feature => {
5659
const maf = feature.get('mafBlock') as string
5760
const blocks = maf.split(';')
@@ -64,12 +67,16 @@ export default class BigMafAdapter extends BaseFeatureDataAdapter {
6467
const sequence = parts[6]!
6568
const organismChr = parts[1]!
6669

70+
const { assemblyName: org, chr } =
71+
parseAssemblyAndChrSimple(organismChr)
72+
6773
if (referenceSeq === undefined) {
6874
referenceSeq = sequence
6975
}
7076

71-
const { assemblyName: org, chr } =
72-
parseAssemblyAndChrSimple(organismChr)
77+
if (sampleFilter && !sampleFilter.has(org)) {
78+
continue
79+
}
7380

7481
alignments[org] = {
7582
chr,

src/LinearMafRenderer/LinearMafRenderer.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,28 @@ export default class LinearMafRenderer extends FeatureRendererType {
5656
const height = samples.length * rowHeight + 100
5757
const width = (region.end - region.start) / bpPerPx
5858

59+
const scaledWidth = Math.ceil(width * highResolutionScaling)
60+
const scaledHeight = Math.ceil(height * highResolutionScaling)
61+
62+
if (scaledWidth > 16384 || scaledHeight > 16384) {
63+
console.warn(
64+
'[LinearMafRenderer] Canvas dimensions may exceed browser limits:',
65+
{
66+
width,
67+
height,
68+
scaledWidth,
69+
scaledHeight,
70+
highResolutionScaling,
71+
bpPerPx,
72+
regionSize: region.end - region.start,
73+
samplesCount: samples.length,
74+
rowHeight,
75+
},
76+
)
77+
}
78+
5979
// Create canvas with high resolution scaling support
60-
const canvas = createCanvas(
61-
Math.ceil(width * highResolutionScaling),
62-
Math.ceil(height * highResolutionScaling),
63-
)
80+
const canvas = createCanvas(scaledWidth, scaledHeight)
6481
const ctx = canvas.getContext('2d')
6582
if (!ctx) {
6683
throw new Error('Could not get canvas context')

src/MafTabixAdapter/MafTabixAdapter.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import {
1616
selectReferenceSequenceString,
1717
} from '../util/parseAssemblyName'
1818

19-
import type { AlignmentRecord } from '../types'
19+
import type { AlignmentRecord, MafAdapterOptions } from '../types'
2020

2121
export default class MafTabixAdapter extends BaseFeatureDataAdapter {
2222
public setupP?: Promise<{ adapter: BaseFeatureDataAdapter }>
@@ -61,12 +61,16 @@ export default class MafTabixAdapter extends BaseFeatureDataAdapter {
6161
return adapter.getHeader()
6262
}
6363

64-
getFeatures(query: Region, opts?: BaseOptions) {
64+
getFeatures(query: Region, opts?: MafAdapterOptions) {
6565
return ObservableCreate<Feature>(async observer => {
6666
const { adapter } = await this.setup(opts)
6767
let firstAssemblyNameFound = ''
6868
const refAssemblyName = this.getConf('refAssemblyName')
6969

70+
const sampleFilter = opts?.samples
71+
? new Set(opts.samples.map(s => s.id))
72+
: undefined
73+
7074
await subscribeToObservable(adapter.getFeatures(query, opts), feature => {
7175
const data = (feature.get('field5') as string).split(',')
7276
const alignments = {} as Record<string, AlignmentRecord>
@@ -95,6 +99,10 @@ export default class MafTabixAdapter extends BaseFeatureDataAdapter {
9599
firstAssemblyNameFound = assemblyName
96100
}
97101

102+
if (sampleFilter && !sampleFilter.has(assemblyName)) {
103+
continue
104+
}
105+
98106
alignments[assemblyName] = {
99107
chr,
100108
start: +startStr!,

src/types.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1+
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
2+
13
/**
24
* Shared types for MAF alignment data
35
*/
46

7+
/**
8+
* Options for MAF adapter getFeatures call.
9+
* Extends BaseOptions with optional samples filter for subtree optimization.
10+
*/
11+
export interface MafAdapterOptions extends BaseOptions {
12+
/** If provided, only parse alignments for these sample IDs */
13+
samples?: Sample[]
14+
}
15+
516
/**
617
* Represents a single organism's alignment within a MAF block.
718
* Used by adapters to return alignment data and by rendering code.

src/util/fastaUtils.test.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,131 @@ test('includeInsertions with no insertions present', () => {
296296
// Should behave same as without includeInsertions since there are none
297297
expect(result).toMatchSnapshot()
298298
})
299+
300+
test('includeInsertions - insertion only in non-visible sample should not add gaps', () => {
301+
// This tests the bug fix where insertions from non-visible samples were
302+
// causing gaps to be added to visible samples.
303+
//
304+
// Scenario: 3 samples exist, but only 2 are selected for display.
305+
// The 3rd (non-visible) sample has an insertion, which creates a gap in the
306+
// reference sequence. The visible samples should NOT have extra gaps added.
307+
//
308+
// seq: AC--GTAC (reference has gap due to assembly3's insertion)
309+
// assembly1: AC--GTAC (no insertion, just gaps where assembly3 has insertion)
310+
// assembly2: AC--GTAC (no insertion, just gaps where assembly3 has insertion)
311+
// assembly3: ACTTGTAC (has TT insertion) - NOT in samples list
312+
const mockFeature = new SimpleFeature({
313+
uniqueId: '123',
314+
refName: 'abc',
315+
start: 100,
316+
end: 106,
317+
seq: 'AC--GTAC',
318+
alignments: {
319+
assembly1: {
320+
chr: 'chr1',
321+
start: 100,
322+
seq: 'AC--GTAC',
323+
strand: 1,
324+
},
325+
assembly2: {
326+
chr: 'chr2',
327+
start: 200,
328+
seq: 'AC--GTAC',
329+
strand: 1,
330+
},
331+
assembly3: {
332+
chr: 'chr3',
333+
start: 300,
334+
seq: 'ACTTGTAC',
335+
strand: 1,
336+
},
337+
},
338+
})
339+
340+
// Only include assembly1 and assembly2, NOT assembly3
341+
const result = processFeaturesToFasta({
342+
features: makeMap([mockFeature]),
343+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
344+
includeInsertions: true,
345+
showAllLetters: true,
346+
regions: [
347+
{
348+
refName: 'chr1',
349+
start: 100,
350+
end: 106,
351+
assemblyName: 'assembly1',
352+
},
353+
],
354+
})
355+
356+
// Since neither visible sample has an actual insertion (both have only gaps
357+
// at the insertion position), no insertion columns should be added.
358+
// The output should be 6 characters (the reference length), not 8.
359+
expect(result[0]).toBe('acgtac')
360+
expect(result[1]).toBe('acgtac')
361+
expect(result[0]).toHaveLength(6)
362+
expect(result[1]).toHaveLength(6)
363+
})
364+
365+
test('includeInsertions - mixed visible/non-visible insertions', () => {
366+
// Scenario: Reference has a gap. One visible sample has an insertion,
367+
// another visible sample doesn't, and a non-visible sample also has an
368+
// insertion. Only the visible sample's insertion should be included.
369+
//
370+
// seq: AC---GTAC (reference has 3-bp gap)
371+
// assembly1: AC-T-GTAC (has T insertion, 1 bp) - visible
372+
// assembly2: AC---GTAC (no insertion) - visible
373+
// assembly3: ACTTTGTAC (has TTT insertion, 3 bp) - NOT visible
374+
const mockFeature = new SimpleFeature({
375+
uniqueId: '123',
376+
refName: 'abc',
377+
start: 100,
378+
end: 106,
379+
seq: 'AC---GTAC',
380+
alignments: {
381+
assembly1: {
382+
chr: 'chr1',
383+
start: 100,
384+
seq: 'AC-T-GTAC',
385+
strand: 1,
386+
},
387+
assembly2: {
388+
chr: 'chr2',
389+
start: 200,
390+
seq: 'AC---GTAC',
391+
strand: 1,
392+
},
393+
assembly3: {
394+
chr: 'chr3',
395+
start: 300,
396+
seq: 'ACTTTGTAC',
397+
strand: 1,
398+
},
399+
},
400+
})
401+
402+
// Only include assembly1 and assembly2, NOT assembly3
403+
const result = processFeaturesToFasta({
404+
features: makeMap([mockFeature]),
405+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
406+
includeInsertions: true,
407+
showAllLetters: true,
408+
regions: [
409+
{
410+
refName: 'chr1',
411+
start: 100,
412+
end: 106,
413+
assemblyName: 'assembly1',
414+
},
415+
],
416+
})
417+
418+
// assembly1 has a 1-bp insertion (t), so the max insertion length is 1
419+
// (not 3, because assembly3's insertion should be ignored)
420+
// assembly1: act-gtac -> with insertion expanded: actgtac (7 chars)
421+
// assembly2: ac--gtac -> with insertion expanded: ac-gtac (7 chars)
422+
expect(result[0]).toBe('actgtac')
423+
expect(result[1]).toBe('ac-gtac')
424+
expect(result[0]).toHaveLength(7)
425+
expect(result[1]).toHaveLength(7)
426+
})

src/util/fastaUtils.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,10 @@ export function processFeaturesToFasta({
8585
}
8686
i--
8787

88-
if (insertionSequence.length > 0) {
88+
// Only add insertion if it contains at least one actual base (not
89+
// just gaps). This filters out insertions that only exist in samples
90+
// that aren't currently visible.
91+
if (insertionSequence.length > 0 && /[^-]/.test(insertionSequence)) {
8992
const insertPos = leftCoord + o - region.start
9093
if (insertPos >= 0 && insertPos <= rlen) {
9194
const existing = insertionsAtPosition.get(insertPos) || []

0 commit comments

Comments
 (0)