Skip to content

Commit f4eb327

Browse files
committed
rf: Factor ColumnsMap creation out of TSV loading
1 parent c1bec83 commit f4eb327

File tree

2 files changed

+43
-35
lines changed

2 files changed

+43
-35
lines changed

src/files/tsv.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Deno.test('TSV loading', async (t) => {
6464
try {
6565
await loadTSV(file)
6666
} catch (e: any) {
67-
assertObjectMatch(e, { code: 'TSV_EQUAL_ROWS', location: '/mismatched_row.tsv', line: 3 })
67+
assertObjectMatch(e, { code: 'TSV_EQUAL_ROWS', line: 3 })
6868
}
6969
})
7070

src/files/tsv.ts

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,47 @@ import type { BIDSFile } from '../types/filetree.ts'
88
import { filememoizeAsync } from '../utils/memoize.ts'
99
import { createUTF8Stream } from './streams.ts'
1010

11+
async function loadColumns(
12+
reader: ReadableStreamDefaultReader<string>,
13+
headers: string[],
14+
maxRows: number,
15+
): Promise<ColumnsMap> {
16+
// Initialize columns in array for construction efficiency
17+
const initialCapacity = maxRows >= 0 ? maxRows : 1000
18+
const columns: string[][] = headers.map(() => new Array<string>(initialCapacity))
19+
20+
maxRows = maxRows >= 0 ? maxRows : Infinity
21+
let rowIndex = 0 // Keep in scope after loop
22+
for (; rowIndex < maxRows; rowIndex++) {
23+
const { done, value } = await reader.read()
24+
if (done) break
25+
26+
// Expect a newline at the end of the file, but otherwise error on empty lines
27+
if (!value) {
28+
const nextRow = await reader.read()
29+
if (nextRow.done) break
30+
throw { code: 'TSV_EMPTY_LINE', line: rowIndex + 2 }
31+
}
32+
33+
const values = value.split('\t')
34+
if (values.length !== headers.length) {
35+
throw { code: 'TSV_EQUAL_ROWS', line: rowIndex + 2 }
36+
}
37+
columns.forEach((column, columnIndex) => {
38+
// Double array size if we exceed the current capacity
39+
if (rowIndex >= column.length) {
40+
column.length = column.length * 2
41+
}
42+
column[rowIndex] = values[columnIndex]
43+
})
44+
}
45+
46+
// Construct map, truncating columns to number of rows read
47+
return new ColumnsMap(
48+
headers.map((header, index) => [header, columns[index].slice(0, rowIndex)]),
49+
)
50+
}
51+
1152
async function _loadTSV(file: BIDSFile, maxRows: number = -1): Promise<ColumnsMap> {
1253
const reader = file.stream
1354
.pipeThrough(createUTF8Stream())
@@ -26,40 +67,7 @@ async function _loadTSV(file: BIDSFile, maxRows: number = -1): Promise<ColumnsMa
2667
}
2768
}
2869

29-
// Initialize columns in array for construction efficiency
30-
const initialCapacity = maxRows >= 0 ? maxRows : 1000
31-
const columns: string[][] = headers.map(() => new Array<string>(initialCapacity))
32-
33-
maxRows = maxRows >= 0 ? maxRows : Infinity
34-
let rowIndex = 0 // Keep in scope after loop
35-
for (; rowIndex < maxRows; rowIndex++) {
36-
const { done, value } = await reader.read()
37-
if (done) break
38-
39-
// Expect a newline at the end of the file, but otherwise error on empty lines
40-
if (!value) {
41-
const nextRow = await reader.read()
42-
if (nextRow.done) break
43-
throw { code: 'TSV_EMPTY_LINE', location: file.path, line: rowIndex + 2 }
44-
}
45-
46-
const values = value.split('\t')
47-
if (values.length !== headers.length) {
48-
throw { code: 'TSV_EQUAL_ROWS', location: file.path, line: rowIndex + 2 }
49-
}
50-
columns.forEach((column, columnIndex) => {
51-
// Double array size if we exceed the current capacity
52-
if (rowIndex >= column.length) {
53-
column.length = column.length * 2
54-
}
55-
column[rowIndex] = values[columnIndex]
56-
})
57-
}
58-
59-
// Construct map, truncating columns to number of rows read
60-
return new ColumnsMap(
61-
headers.map((header, index) => [header, columns[index].slice(0, rowIndex)]),
62-
)
70+
return await loadColumns(reader, headers, maxRows)
6371
} finally {
6472
await reader.cancel()
6573
}

0 commit comments

Comments
 (0)