GMOD
diff --git a/‎README.md‎
Lines changed: 44 additions & 29 deletions b/‎README.md‎
Lines changed: 44 additions & 29 deletions
diff --git a/‎src/craiIndex.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/craiIndex.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cramFile/codecs/huffman.ts‎
Lines changed: 17 additions & 26 deletions b/‎src/cramFile/codecs/huffman.ts‎
Lines changed: 17 additions & 26 deletions
diff --git a/‎src/cramFile/slice/decodeRecord.ts‎
Lines changed: 26 additions & 50 deletions b/‎src/cramFile/slice/decodeRecord.ts‎
Lines changed: 26 additions & 50 deletions
@@ -3,7 +3,8 @@
 [![NPM version](https://img.shields.io/npm/v/@gmod/cram.svg?style=flat-square)](https://npmjs.org/package/@gmod/cram)
 [![Build Status](https://img.shields.io/github/actions/workflow/status/GMOD/cram-js/push.yml?branch=main)](https://github.com/GMOD/cram-js/actions?query=branch%3Amain+workflow%3APush+)
 
-Read CRAM files with pure JS, works in node or the browser. Supports CRAM 2.x and 3.x, `.crai` indexes, and bzip2/lzma codecs.
+Read CRAM files with pure JS, works in node or the browser. Supports CRAM 2.x
+and 3.x, `.crai` indexes, and bzip2/lzma codecs.
 
 ## Install
 
@@ -53,7 +54,11 @@ samHeader
   })
 
 // Fetch records for a range (1-based, closed coordinates)
-const records = await indexedFile.getRecordsForRange(nameToId['chr1'], 10000, 20000)
+const records = await indexedFile.getRecordsForRange(
+  nameToId['chr1'],
+  10000,
+  20000,
+)
 
 for (const record of records) {
   console.log(record.readName, record.alignmentStart, record.mappingQuality)
@@ -66,25 +71,27 @@ for (const record of records) {
 }
 ```
 
-See the [example directory](./example) for browser usage with `<script>` tag and the bundled `cram-bundle.js`.
+See the [example directory](./example) for browser usage with `<script>` tag and
+the bundled `cram-bundle.js`.
 
 ## API
 
 ### `IndexedCramFile`
 
 ```js
 new IndexedCramFile({
-  cramPath,         // local path
-  cramUrl,          // remote URL
-  cramFilehandle,   // generic-filehandle2 compatible handle
-  index,            // CraiIndex instance (or any object with getEntriesForRange)
-  seqFetch,         // async (seqId, start, end) => string
+  cramPath, // local path
+  cramUrl, // remote URL
+  cramFilehandle, // generic-filehandle2 compatible handle
+  index, // CraiIndex instance (or any object with getEntriesForRange)
+  seqFetch, // async (seqId, start, end) => string
   checkSequenceMD5, // default true; set false to avoid large reference fetches
-  cacheSize,        // max cached records, default 20000
+  cacheSize, // max cached records, default 20000
 })
 ```
 
-- `getRecordsForRange(seqId, start, end, opts?)` → `Promise<CramRecord[]>` — 1-based closed coords. `opts`: `{ viewAsPairs, pairAcrossChr, maxInsertSize }`
+- `getRecordsForRange(seqId, start, end, opts?)` → `Promise<CramRecord[]>` —
+  1-based closed coords. `opts`: `{ viewAsPairs, pairAcrossChr, maxInsertSize }`
 - `hasDataForReferenceSequence(seqId)` → `Promise<boolean>`
 
 ### `CraiIndex`
@@ -93,29 +100,33 @@ Takes `{ path, url, filehandle }` — one of the three is required.
 
 ### `CramRecord`
 
-| Field | Description |
-|---|---|
-| `readName` | read name |
-| `sequenceId` | numeric reference ID |
-| `alignmentStart` | 1-based start |
-| `qualityScores` | `Int8Array` of per-base quality scores |
-| `readFeatures` | array of read features (see below) |
-| `tags` | auxiliary tags object |
+| Field            | Description                            |
+| ---------------- | -------------------------------------- |
+| `readName`       | read name                              |
+| `sequenceId`     | numeric reference ID                   |
+| `alignmentStart` | 1-based start                          |
+| `qualityScores`  | `Int8Array` of per-base quality scores |
+| `readFeatures`   | array of read features (see below)     |
+| `tags`           | auxiliary tags object                  |
 
-Flag methods (return `boolean`): `isPaired`, `isProperlyPaired`, `isSegmentUnmapped`, `isMateUnmapped`, `isReverseComplemented`, `isMateReverseComplemented`, `isRead1`, `isRead2`, `isSecondary`, `isFailedQc`, `isDuplicate`, `isSupplementary`
+Flag methods (return `boolean`): `isPaired`, `isProperlyPaired`,
+`isSegmentUnmapped`, `isMateUnmapped`, `isReverseComplemented`,
+`isMateReverseComplemented`, `isRead1`, `isRead2`, `isSecondary`, `isFailedQc`,
+`isDuplicate`, `isSupplementary`
 
-`getReadBases()` — returns the read sequence string. Requires `seqFetch` and is populated automatically by `getRecordsForRange`.
+`getReadBases()` — returns the read sequence string. Requires `seqFetch` and is
+populated automatically by `getRecordsForRange`.
 
 ### ReadFeatures
 
 Each entry in `record.readFeatures`:
 
-| Field | Description |
-|---|---|
-| `code` | feature type — one of `bqBXIDiQNSPH` (see CRAM spec §8) |
-| `pos` | read position (1-based) |
-| `refPos` | reference position (1-based) |
-| `ref` / `sub` | reference and substituted base (code `X` only) |
+| Field         | Description                                             |
+| ------------- | ------------------------------------------------------- |
+| `code`        | feature type — one of `bqBXIDiQNSPH` (see CRAM spec §8) |
+| `pos`         | read position (1-based)                                 |
+| `refPos`      | reference position (1-based)                            |
+| `ref` / `sub` | reference and substituted base (code `X` only)          |
 
 ### Error classes
 
@@ -125,19 +136,23 @@ Each entry in `record.readFeatures`:
 
 ## Publishing
 
-Push a git tag to trigger a release via GitHub Actions and [npm trusted publishing](https://docs.npmjs.com/generating-provenance-statements).
+Push a git tag to trigger a release via GitHub Actions and
+[npm trusted publishing](https://docs.npmjs.com/generating-provenance-statements).
 
 ## Academic Use
 
-Written with [NHGRI](http://genome.gov) funding as part of [JBrowse](http://jbrowse.org). If you use this in a publication, please cite the most recent JBrowse paper at [jbrowse.org](http://jbrowse.org).
+Written with [NHGRI](http://genome.gov) funding as part of
+[JBrowse](http://jbrowse.org). If you use this in a publication, please cite the
+most recent JBrowse paper at [jbrowse.org](http://jbrowse.org).
 
 ## License
 
 MIT © [Robert Buels](https://github.com/rbuels)
 
 ## Publishing
 
-[Trusted publishing](https://docs.npmjs.com/about-trusted-publishing) via GitHub Actions.
+[Trusted publishing](https://docs.npmjs.com/about-trusted-publishing) via GitHub
+Actions.
 
 ```bash
 npm version patch  # or minor/major
 
@@ -25,7 +25,7 @@ function addRecordToIndex(index: ParsedIndex, record: number[]) {
     index[s] = []
   }
 
-  index[s]!.push({
+  index[s].push({
     start: start!,
     span: span!,
     containerStart: containerStart!,
 
@@ -152,14 +152,12 @@ export default class HuffmanIntCodec extends CramCodec<
     this.sortedValuesByBitCode = this.sortedCodes.map(c => c.value)
     this.sortedBitCodes = this.sortedCodes.map(c => c.bitCode)
     this.sortedBitLengthsByBitCode = this.sortedCodes.map(c => c.bitLength)
-    if (this.sortedBitCodes.length === 0) {
-      return
-    }
-    const maxBitCode = Math.max(...this.sortedBitCodes)
-
-    this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
-    for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
-      this.bitCodeToValue[this.sortedCodes[i]!.bitCode] = i
+    if (this.sortedBitCodes.length > 0) {
+      const maxBitCode = Math.max(...this.sortedBitCodes)
+      this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
+      for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
+        this.bitCodeToValue[this.sortedCodes[i]!.bitCode] = i
+      }
     }
   }
 
@@ -172,10 +170,6 @@ export default class HuffmanIntCodec extends CramCodec<
     return this._decode(slice, coreDataBlock, cursors.coreBlock)
   }
 
-  // _decodeNull() {
-  //   return -1
-  // }
-
   // the special case for zero-length codes
   _decodeZeroLengthCode() {
     return this.sortedCodes[0]!.value
@@ -194,20 +188,17 @@ export default class HuffmanIntCodec extends CramCodec<
         bits |= getBitsInline(input, coreCursor, bitsToRead)
       }
       prevLen = length
-      {
-        const index = this.bitCodeToValue[bits]!
-        if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
-          return this.sortedValuesByBitCode[index]!
-        }
-
-        for (
-          let j = i;
-          this.sortedCodes[j + 1]!.bitLength! === length &&
-          j < this.sortedCodes.length;
-          j += 1
-        ) {
-          i += 1
-        }
+      const index = this.bitCodeToValue[bits] ?? -1
+      if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
+        return this.sortedValuesByBitCode[index]!
+      }
+      for (
+        let j = i;
+        j + 1 < this.sortedCodes.length &&
+        this.sortedCodes[j + 1]!.bitLength === length;
+        j += 1
+      ) {
+        i += 1
       }
     }
     throw new CramMalformedError('Huffman symbol not found.')
 
@@ -52,16 +52,9 @@ export interface BoundDecoders {
   TN(): number | undefined
 }
 
-/**
- * parse a BAM tag's array value from a binary buffer
- * @private
- */
-// Uses DataView instead of typed arrays (e.g. new Int32Array(buffer.buffer))
-// because the buffer may be a subarray of a larger ArrayBuffer. Typed array
-// constructors like Int32Array interpret .buffer as the entire underlying
-// ArrayBuffer starting at byte 0, ignoring the subarray's byteOffset. This
-// caused silent data corruption when reading tag values. DataView with explicit
-// byteOffset reads from the correct position within the parent buffer.
+// Uses DataView rather than typed arrays because the buffer is a subarray of a
+// larger ArrayBuffer. Int32Array(buffer.buffer) would start at byte 0 of the
+// parent, ignoring buffer.byteOffset, causing silent data corruption.
 function parseTagValueArray(buffer: Uint8Array) {
   const arrayType = String.fromCharCode(buffer[0]!)
 
@@ -148,14 +141,13 @@ function parseTagData(tagType: string, buffer: Uint8Array) {
   throw new CramMalformedError(`Unrecognized tag type ${tagType}`)
 }
 
-// Read-feature schema: a charCode-indexed array of [letter, fn] tuples where
-// fn() decodes the feature's data, fully transformed
-// (character → fromCharCode, string → decodeLatin1, numArray → Array.from,
-// number → identity, B → [base, qualityScore]). Built once per slice; the
-// inner loop becomes a charCode lookup + monomorphic call.
+// Read-feature schema: a charCode-indexed array of [code, fn] tuples where
+// fn() decodes and transforms the feature's data (character → fromCharCode,
+// string → decodeLatin1, numArray → Array.from, number → identity,
+// B → [base, qualityScore]). Built once per slice; the inner loop becomes
+// a charCode lookup + monomorphic call with no per-feature allocation.
 type RFData = string | number | number[] | [string, number]
-type RFFn = () => RFData
-export type RFEntry = readonly [code: string, fn: RFFn]
+export type RFEntry = readonly [code: string, fn: () => RFData]
 
 export function buildRFSchema(
   bd: BoundDecoders,
@@ -165,7 +157,7 @@ export function buildRFSchema(
   const arr: (RFEntry | undefined)[] = new Array(128)
   arr['B'.charCodeAt(0)] = [
     'B',
-    () => [String.fromCharCode(bd.BA()!), bd.QS()!],
+    () => [String.fromCharCode(bd.BA()!), bd.QS()!] as [string, number],
   ]
   arr['X'.charCodeAt(0)] = ['X', () => bd.BS()!]
   arr['D'.charCodeAt(0)] = ['D', () => bd.DL()!]
@@ -186,14 +178,10 @@ function decodeReadFeatures(
   readFeatureCount: number,
   bd: BoundDecoders,
   schema: (RFEntry | undefined)[],
-) {
-  // Track the running offset between ref and read coordinates so that
-  // refPos = readPos + refOffset. Deletions advance ref past consumed
-  // ref bases (offset goes up); insertions advance read past consumed
-  // read bases (offset goes down). This mirrors CIGAR consume-ref vs
-  // consume-read semantics.
+): [ReadFeature[], number] {
   let readPos = 0
-  let refOffset = alignmentStart - 1
+  let refDelta = 0
+  const base = alignmentStart - 1
   const readFeatures: ReadFeature[] = new Array(readFeatureCount)
   const decodeFC = bd.FC
   const decodeFP = bd.FP
@@ -215,19 +203,19 @@ function decodeReadFeatures(
     readFeatures[i] = {
       code,
       pos: readPos,
-      refPos: readPos + refOffset,
+      refPos: readPos + base + refDelta,
       data,
     } as ReadFeature
 
     if (code === 'D' || code === 'N') {
-      refOffset += data as number
+      refDelta += data as number
     } else if (code === 'I' || code === 'S') {
-      refOffset -= (data as string).length
+      refDelta -= (data as string).length
     } else if (code === 'i') {
-      refOffset -= 1
+      refDelta -= 1
     }
   }
-  return readFeatures
+  return [readFeatures, refDelta]
 }
 
 export type BulkByteRawDecoder = (
@@ -260,11 +248,11 @@ function decodeReadBases(
   if (raw) {
     return decodeLatin1(raw)
   }
-  let s = ''
+  const buf = new Uint8Array(readLength)
   for (let i = 0; i < readLength; i++) {
-    s += String.fromCharCode(decodeBA()!)
+    buf[i] = decodeBA()!
   }
-  return s
+  return decodeLatin1(buf)
 }
 
 export type BoundTagDecoders = Record<
@@ -304,7 +292,7 @@ export default function decodeRecord(
       : sliceHeader.parsedContent.refSeqId
 
   const readLength = bd.RL()!
-  // if APDelta, will calculate the true start in a second pass
+  // if APDelta, AP is a delta from the previous record's alignmentStart
   let alignmentStart = bd.AP()!
   if (compressionScheme.APdelta) {
     alignmentStart = alignmentStart + cursors.lastAlignmentStart
@@ -393,28 +381,16 @@ export default function decodeRecord(
   if (!BamFlagsDecoder.isSegmentUnmapped(flags)) {
     // reading read features
     const readFeatureCount = bd.FN()!
+    lengthOnRef = readLength
     if (readFeatureCount) {
-      readFeatures = decodeReadFeatures(
+      const [features, refDelta] = decodeReadFeatures(
         alignmentStart,
         readFeatureCount,
         bd,
         rfSchema,
       )
-    }
-
-    // compute the read's true span on the reference sequence, and the end
-    // coordinate of the alignment on the reference
-    lengthOnRef = readLength
-    if (readFeatures) {
-      for (const { code, data } of readFeatures) {
-        if (code === 'D' || code === 'N') {
-          lengthOnRef += data
-        } else if (code === 'I' || code === 'S') {
-          lengthOnRef = lengthOnRef - data.length
-        } else if (code === 'i') {
-          lengthOnRef = lengthOnRef - 1
-        }
-      }
+      readFeatures = features
+      lengthOnRef += refDelta
     }
     if (Number.isNaN(lengthOnRef)) {
       console.warn(
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ function addRecordToIndex(index: ParsedIndex, record: number[]) {`
`25`	`25`	`index[s] = []`
`26`	`26`	`}`
`27`	`27`
`28`		`- index[s]!.push({`
	`28`	`+ index[s].push({`
`29`	`29`	`start: start!,`
`30`	`30`	`span: span!,`
`31`	`31`	`containerStart: containerStart!,`