Skip to content

Commit fe9d3e4

Browse files
committed
feat: Add optional maxRows argument to loadTSV
1 parent f36d1fb commit fe9d3e4

File tree

3 files changed

+50
-7
lines changed

3 files changed

+50
-7
lines changed

src/files/tsv.test.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,48 @@ Deno.test('TSV loading', async (t) => {
6262
}
6363
})
6464

65+
await t.step('maxRows limits the number of rows read', async () => {
66+
const file = pathToFile('/long.tsv')
67+
// Use 1500 to avoid overlap with default initial capacity
68+
const text = 'a\tb\tc\n' + '1\t2\t3\n'.repeat(1500)
69+
file.stream = streamFromString(text)
70+
71+
let map = await loadTSV(file, 0)
72+
assertEquals(map.a, [])
73+
assertEquals(map.b, [])
74+
assertEquals(map.c, [])
75+
76+
// Clear memoization cache. We currently do not key on maxRows.
77+
loadTSV.cache.clear()
78+
file.stream = streamFromString(text)
79+
map = await loadTSV(file, 1)
80+
assertEquals(map.a, ['1'])
81+
assertEquals(map.b, ['2'])
82+
assertEquals(map.c, ['3'])
83+
84+
loadTSV.cache.clear()
85+
file.stream = streamFromString(text)
86+
map = await loadTSV(file, 2)
87+
assertEquals(map.a, ['1', '1'])
88+
assertEquals(map.b, ['2', '2'])
89+
assertEquals(map.c, ['3', '3'])
90+
91+
loadTSV.cache.clear()
92+
file.stream = streamFromString(text)
93+
map = await loadTSV(file, -1)
94+
assertEquals(map.a, Array(1500).fill('1'))
95+
assertEquals(map.b, Array(1500).fill('2'))
96+
assertEquals(map.c, Array(1500).fill('3'))
97+
98+
loadTSV.cache.clear()
99+
// Check that maxRows does not truncate shorter files
100+
file.stream = streamFromString('a\tb\tc\n1\t2\t3\n4\t5\t6\n7\t8\t9\n')
101+
map = await loadTSV(file, 4)
102+
assertEquals(map.a, ['1', '4', '7'])
103+
assertEquals(map.b, ['2', '5', '8'])
104+
assertEquals(map.c, ['3', '6', '9'])
105+
})
106+
65107
// Tests will have populated the memoization cache
66108
await loadTSV.cache.clear()
67109
})

src/files/tsv.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import type { BIDSFile } from '../types/filetree.ts'
88
import { filememoizeAsync } from '../utils/memoize.ts'
99
import { createUTF8Stream } from './streams.ts'
1010

11-
async function _loadTSV(file: BIDSFile): Promise<ColumnsMap> {
11+
async function _loadTSV(file: BIDSFile, maxRows: number = -1): Promise<ColumnsMap> {
1212
const reader = file.stream
1313
.pipeThrough(createUTF8Stream())
1414
.pipeThrough(new TextLineStream())
@@ -19,11 +19,12 @@ async function _loadTSV(file: BIDSFile): Promise<ColumnsMap> {
1919
const headers = (headerRow.done || !headerRow.value) ? [] : headerRow.value.split('\t')
2020

2121
// Initialize columns in array for construction efficiency
22-
const initialCapacity = 1000
22+
const initialCapacity = maxRows >= 0 ? maxRows : 1000
2323
const columns: string[][] = headers.map(() => new Array<string>(initialCapacity))
2424

25+
maxRows = maxRows >= 0 ? maxRows : Infinity
2526
let rowIndex = 0 // Keep in scope after loop
26-
for (; ; rowIndex++) {
27+
for (; rowIndex < maxRows; rowIndex++) {
2728
const { done, value } = await reader.read()
2829
if (done) break
2930

src/utils/memoize.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@ export const memoize = <T>(
1515
}
1616

1717
export function filememoizeAsync<F extends HasParent, T>(
18-
fn: (file: F) => Promise<T>,
19-
): WithCache<(file: F) => Promise<T>> {
18+
fn: (file: F, ...args: any[]) => Promise<T>,
19+
): WithCache<(file: F, ...args: any[]) => Promise<T>> {
2020
const cache = new Map<string, Map<F, T>>()
21-
const cached = async function (this: any, file: F): Promise<T> {
21+
const cached = async function (this: any, file: F, ...args: any[]): Promise<T> {
2222
let subcache = cache.get(file.parent.path)
2323
if (!subcache) {
2424
subcache = new Map()
2525
cache.set(file.parent.path, subcache)
2626
}
2727
let val = subcache.get(file)
2828
if (!val) {
29-
val = await fn.call(this, file)
29+
val = await fn.call(this, file, ...args)
3030
subcache.set(file, val)
3131
}
3232
return val

0 commit comments

Comments
 (0)