Skip to content

Commit bb7febd

Browse files
cmdcolinclaude
andcommitted
Further simplifications and correctness fixes in decode hot path
- huffman: fix crash when inner loop reaches last code (bounds check was after array access); remove dead commented-out method; nest early-return in buildCaches into if block; use ?? -1 instead of ! for bitCodeToValue lookup; remove spurious inner braces in _decode - decodeRecord: fold lengthOnRef computation into decodeReadFeatures return value, eliminating the second pass over read features; fix push(...spread) in getAllMatedRecords; hoist duplicate `content` variable in bind(); extract decodeQualityScores/decodeReadBases helpers; use Uint8Array+decodeLatin1 in decodeReadBases fallback; remove dead RFFn alias; fix stale comment - index.ts: inline ByteArrayStopCodec decode in bind() fast path; deduplicate tag decoder subarray body via readTagLen closure; fix indentation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 35bc443 commit bb7febd

3 files changed

Lines changed: 61 additions & 109 deletions

File tree

src/cramFile/codecs/huffman.ts

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,12 @@ export default class HuffmanIntCodec extends CramCodec<
152152
this.sortedValuesByBitCode = this.sortedCodes.map(c => c.value)
153153
this.sortedBitCodes = this.sortedCodes.map(c => c.bitCode)
154154
this.sortedBitLengthsByBitCode = this.sortedCodes.map(c => c.bitLength)
155-
if (this.sortedBitCodes.length === 0) {
156-
return
157-
}
158-
const maxBitCode = Math.max(...this.sortedBitCodes)
159-
160-
this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
161-
for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
162-
this.bitCodeToValue[this.sortedCodes[i]!.bitCode] = i
155+
if (this.sortedBitCodes.length > 0) {
156+
const maxBitCode = Math.max(...this.sortedBitCodes)
157+
this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
158+
for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
159+
this.bitCodeToValue[this.sortedCodes[i]!.bitCode] = i
160+
}
163161
}
164162
}
165163

@@ -172,10 +170,6 @@ export default class HuffmanIntCodec extends CramCodec<
172170
return this._decode(slice, coreDataBlock, cursors.coreBlock)
173171
}
174172

175-
// _decodeNull() {
176-
// return -1
177-
// }
178-
179173
// the special case for zero-length codes
180174
_decodeZeroLengthCode() {
181175
return this.sortedCodes[0]!.value
@@ -194,20 +188,17 @@ export default class HuffmanIntCodec extends CramCodec<
194188
bits |= getBitsInline(input, coreCursor, bitsToRead)
195189
}
196190
prevLen = length
197-
{
198-
const index = this.bitCodeToValue[bits]!
199-
if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
200-
return this.sortedValuesByBitCode[index]!
201-
}
202-
203-
for (
204-
let j = i;
205-
this.sortedCodes[j + 1]!.bitLength! === length &&
206-
j < this.sortedCodes.length;
207-
j += 1
208-
) {
209-
i += 1
210-
}
191+
const index = this.bitCodeToValue[bits] ?? -1
192+
if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
193+
return this.sortedValuesByBitCode[index]!
194+
}
195+
for (
196+
let j = i;
197+
j + 1 < this.sortedCodes.length &&
198+
this.sortedCodes[j + 1]!.bitLength === length;
199+
j += 1
200+
) {
201+
i += 1
211202
}
212203
}
213204
throw new CramMalformedError('Huffman symbol not found.')

src/cramFile/slice/decodeRecord.ts

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,9 @@ export interface BoundDecoders {
5252
TN(): number | undefined
5353
}
5454

55-
/**
56-
* parse a BAM tag's array value from a binary buffer
57-
* @private
58-
*/
59-
// Uses DataView instead of typed arrays (e.g. new Int32Array(buffer.buffer))
60-
// because the buffer may be a subarray of a larger ArrayBuffer. Typed array
61-
// constructors like Int32Array interpret .buffer as the entire underlying
62-
// ArrayBuffer starting at byte 0, ignoring the subarray's byteOffset. This
63-
// caused silent data corruption when reading tag values. DataView with explicit
64-
// byteOffset reads from the correct position within the parent buffer.
55+
// Uses DataView rather than typed arrays because the buffer is a subarray of a
56+
// larger ArrayBuffer. Int32Array(buffer.buffer) would start at byte 0 of the
57+
// parent, ignoring buffer.byteOffset, causing silent data corruption.
6558
function parseTagValueArray(buffer: Uint8Array) {
6659
const arrayType = String.fromCharCode(buffer[0]!)
6760

@@ -148,25 +141,21 @@ function parseTagData(tagType: string, buffer: Uint8Array) {
148141
throw new CramMalformedError(`Unrecognized tag type ${tagType}`)
149142
}
150143

151-
// Read-feature schema: a charCode-indexed array of [letter, fn] tuples where
152-
// fn() decodes the feature's data, fully transformed
153-
// (character → fromCharCode, string → decodeLatin1, numArray → Array.from,
154-
// number → identity, B → [base, qualityScore]). Built once per slice; the
155-
// inner loop becomes a charCode lookup + monomorphic call.
144+
// Read-feature schema: a charCode-indexed array of [code, fn] tuples where
145+
// fn() decodes and transforms the feature's data (character → fromCharCode,
146+
// string → decodeLatin1, numArray → Array.from, number → identity,
147+
// B → [base, qualityScore]). Built once per slice; the inner loop becomes
148+
// a charCode lookup + monomorphic call with no per-feature allocation.
156149
type RFData = string | number | number[] | [string, number]
157-
type RFFn = () => RFData
158-
export type RFEntry = readonly [code: string, fn: RFFn]
150+
export type RFEntry = readonly [code: string, fn: () => RFData]
159151

160152
export function buildRFSchema(
161153
bd: BoundDecoders,
162154
majorVersion: number,
163155
): (RFEntry | undefined)[] {
164156
const SC = majorVersion > 1 ? bd.SC : bd.IN
165157
const arr: (RFEntry | undefined)[] = new Array(128)
166-
arr['B'.charCodeAt(0)] = [
167-
'B',
168-
() => [String.fromCharCode(bd.BA()!), bd.QS()!],
169-
]
158+
arr['B'.charCodeAt(0)] = ['B', () => [String.fromCharCode(bd.BA()!), bd.QS()!] as [string, number]]
170159
arr['X'.charCodeAt(0)] = ['X', () => bd.BS()!]
171160
arr['D'.charCodeAt(0)] = ['D', () => bd.DL()!]
172161
arr['I'.charCodeAt(0)] = ['I', () => decodeLatin1(bd.IN()!)]
@@ -186,12 +175,10 @@ function decodeReadFeatures(
186175
readFeatureCount: number,
187176
bd: BoundDecoders,
188177
schema: (RFEntry | undefined)[],
189-
) {
190-
// Track the running offset between ref and read coordinates so that
191-
// refPos = readPos + refOffset. Deletions advance ref past consumed
192-
// ref bases (offset goes up); insertions advance read past consumed
193-
// read bases (offset goes down). This mirrors CIGAR consume-ref vs
194-
// consume-read semantics.
178+
): [ReadFeature[], number] {
179+
// refOffset tracks the running base-pair gap between read and reference
180+
// coordinates. Returns the net delta (refOffset - initial) so the caller
181+
// can compute lengthOnRef = readLength + delta without a second pass.
195182
let readPos = 0
196183
let refOffset = alignmentStart - 1
197184
const readFeatures: ReadFeature[] = new Array(readFeatureCount)
@@ -227,7 +214,7 @@ function decodeReadFeatures(
227214
refOffset -= 1
228215
}
229216
}
230-
return readFeatures
217+
return [readFeatures, refOffset - (alignmentStart - 1)]
231218
}
232219

233220
export type BulkByteRawDecoder = (
@@ -260,11 +247,11 @@ function decodeReadBases(
260247
if (raw) {
261248
return decodeLatin1(raw)
262249
}
263-
let s = ''
250+
const buf = new Uint8Array(readLength)
264251
for (let i = 0; i < readLength; i++) {
265-
s += String.fromCharCode(decodeBA()!)
252+
buf[i] = decodeBA()!
266253
}
267-
return s
254+
return decodeLatin1(buf)
268255
}
269256

270257
export type BoundTagDecoders = Record<
@@ -304,7 +291,7 @@ export default function decodeRecord(
304291
: sliceHeader.parsedContent.refSeqId
305292

306293
const readLength = bd.RL()!
307-
// if APDelta, will calculate the true start in a second pass
294+
// if APDelta, AP is a delta from the previous record's alignmentStart
308295
let alignmentStart = bd.AP()!
309296
if (compressionScheme.APdelta) {
310297
alignmentStart = alignmentStart + cursors.lastAlignmentStart
@@ -393,28 +380,16 @@ export default function decodeRecord(
393380
if (!BamFlagsDecoder.isSegmentUnmapped(flags)) {
394381
// reading read features
395382
const readFeatureCount = bd.FN()!
383+
lengthOnRef = readLength
396384
if (readFeatureCount) {
397-
readFeatures = decodeReadFeatures(
385+
const [features, refDelta] = decodeReadFeatures(
398386
alignmentStart,
399387
readFeatureCount,
400388
bd,
401389
rfSchema,
402390
)
403-
}
404-
405-
// compute the read's true span on the reference sequence, and the end
406-
// coordinate of the alignment on the reference
407-
lengthOnRef = readLength
408-
if (readFeatures) {
409-
for (const { code, data } of readFeatures) {
410-
if (code === 'D' || code === 'N') {
411-
lengthOnRef += data
412-
} else if (code === 'I' || code === 'S') {
413-
lengthOnRef = lengthOnRef - data.length
414-
} else if (code === 'i') {
415-
lengthOnRef = lengthOnRef - 1
416-
}
417-
}
391+
readFeatures = features
392+
lengthOnRef += refDelta
418393
}
419394
if (Number.isNaN(lengthOnRef)) {
420395
console.warn(

src/cramFile/slice/index.ts

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ function calculateMultiSegmentMatedTemplateLength(
7070
'intra-slice mate record not found, this file seems malformed',
7171
)
7272
}
73-
records.push(...getAllMatedRecords(mateRecord))
73+
for (const r of getAllMatedRecords(mateRecord)) {
74+
records.push(r)
75+
}
7476
}
7577
return records
7678
}
@@ -549,11 +551,10 @@ export default class CramSlice {
549551
return () => undefined
550552
}
551553
const cursor = cursors.externalBlocks.getCursor(bid)
554+
const content = contentBlock.content
552555
if (codec.dataType === 'int') {
553-
const content = contentBlock.content
554556
return () => parseItf8(content, cursor)
555557
}
556-
const content = contentBlock.content
557558
return () => content[cursor.bytePosition++]
558559
}
559560
if (codec instanceof ByteArrayStopCodec) {
@@ -637,40 +638,25 @@ export default class CramSlice {
637638
const valContent = valContentBlock.content
638639
const valCursor = cursors.externalBlocks.getCursor(valBid)
639640
const lenPreDecoded = preDecodedIntBlocks.get(lenBid)
640-
if (lenPreDecoded) {
641-
boundTagDecoders[tagId] = () => {
642-
const length = lenPreDecoded.values[lenPreDecoded.index++]!
643-
if (length === 0) {
644-
return EMPTY_BYTES
645-
}
646-
const start = valCursor.bytePosition
647-
const end = start + length
648-
if (end > valContent.length) {
649-
throw new CramBufferOverrunError(
650-
'attempted to read beyond end of block. this file seems truncated.',
651-
)
652-
}
653-
valCursor.bytePosition = end
654-
return valContent.subarray(start, end)
641+
const lenContent = lenContentBlock.content
642+
const lenCursor = cursors.externalBlocks.getCursor(lenBid)
643+
const readTagLen = lenPreDecoded
644+
? () => lenPreDecoded.values[lenPreDecoded.index++]!
645+
: () => parseItf8(lenContent, lenCursor)
646+
boundTagDecoders[tagId] = () => {
647+
const length = readTagLen()
648+
if (length === 0) {
649+
return EMPTY_BYTES
655650
}
656-
} else {
657-
const lenContent = lenContentBlock.content
658-
const lenCursor = cursors.externalBlocks.getCursor(lenBid)
659-
boundTagDecoders[tagId] = () => {
660-
const length = parseItf8(lenContent, lenCursor)
661-
if (length === 0) {
662-
return EMPTY_BYTES
663-
}
664-
const start = valCursor.bytePosition
665-
const end = start + length
666-
if (end > valContent.length) {
667-
throw new CramBufferOverrunError(
668-
'attempted to read beyond end of block. this file seems truncated.',
669-
)
670-
}
671-
valCursor.bytePosition = end
672-
return valContent.subarray(start, end)
651+
const start = valCursor.bytePosition
652+
const end = start + length
653+
if (end > valContent.length) {
654+
throw new CramBufferOverrunError(
655+
'attempted to read beyond end of block. this file seems truncated.',
656+
)
673657
}
658+
valCursor.bytePosition = end
659+
return valContent.subarray(start, end)
674660
}
675661
} else {
676662
boundTagDecoders[tagId] = bindTagFallback(tagId)

0 commit comments

Comments
 (0)