Skip to content

Commit 88abfb8

Browse files
committed
test: Test .tsv.gz loading
1 parent 88b6af7 commit 88abfb8

File tree

1 file changed

+110
-1
lines changed

1 file changed

+110
-1
lines changed

src/files/tsv.test.ts

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import {
66
assertStrictEquals,
77
} from '@std/assert'
88
import { pathToFile } from './filetree.ts'
9-
import { loadTSV } from './tsv.ts'
9+
import { loadTSV, loadTSVGZ } from './tsv.ts'
1010
import { streamFromString } from '../tests/utils.ts'
1111
import { ColumnsMap } from '../types/columns.ts'
1212

@@ -178,3 +178,112 @@ Deno.test('TSV loading', async (t) => {
178178
// Tests will have populated the memoization cache
179179
loadTSV.cache.clear()
180180
})
181+
182+
Deno.test('TSVGZ loading', async (t) => {
183+
await t.step('No header and empty file produces empty map', async () => {
184+
const file = pathToFile('/empty.tsv.gz')
185+
file.stream = streamFromString('').pipeThrough(new CompressionStream('gzip'))
186+
187+
const map = await loadTSVGZ(file, [])
188+
// map.size looks for a column called map, so work around it
189+
assertEquals(Object.keys(map).length, 0)
190+
})
191+
192+
await t.step('Empty file produces header-only map', async () => {
193+
const file = pathToFile('/empty.tsv.gz')
194+
file.stream = streamFromString('').pipeThrough(new CompressionStream('gzip'))
195+
196+
const map = await loadTSVGZ(file, ['a', 'b', 'c'])
197+
assertEquals(map.a, [])
198+
assertEquals(map.b, [])
199+
assertEquals(map.c, [])
200+
})
201+
202+
await t.step('Single column file produces single column maps', async () => {
203+
const file = pathToFile('/single_column.tsv')
204+
file.stream = streamFromString('1\n2\n3\n').pipeThrough(new CompressionStream('gzip'))
205+
206+
const map = await loadTSVGZ(file, ['a'])
207+
assertEquals(map.a, ['1', '2', '3'])
208+
})
209+
210+
await t.step('Mismatched header length throws issue', async () => {
211+
const file = pathToFile('/single_column.tsv.gz')
212+
file.stream = streamFromString('1\n2\n3\n').pipeThrough(new CompressionStream('gzip'))
213+
214+
try {
215+
await loadTSVGZ(file, ['a', 'b'])
216+
} catch (e: any) {
217+
assertObjectMatch(e, { code: 'TSV_EQUAL_ROWS', line: 1 })
218+
}
219+
})
220+
221+
await t.step('Missing final newline is ignored', async () => {
222+
const file = pathToFile('/missing_newline.tsv.gz')
223+
file.stream = streamFromString('1\n2\n3').pipeThrough(new CompressionStream('gzip'))
224+
225+
const map = await loadTSVGZ(file, ['a'])
226+
assertEquals(map.a, ['1', '2', '3'])
227+
})
228+
229+
await t.step('Empty row throws issue', async () => {
230+
const file = pathToFile('/empty_row.tsv.gz')
231+
file.stream = streamFromString('1\t2\t3\n\n4\t5\t6\n').pipeThrough(new CompressionStream('gzip'))
232+
233+
try {
234+
await loadTSVGZ(file, ['a', 'b', 'c'])
235+
} catch (e: any) {
236+
assertObjectMatch(e, { code: 'TSV_EMPTY_LINE', line: 2 })
237+
}
238+
})
239+
240+
await t.step('Mislabeled TSV throws issue', async () => {
241+
const file = pathToFile('/mismatched_row.tsv.gz')
242+
file.stream = streamFromString('a\tb\tc\n1\t2\t3\n4\t5\n')
243+
244+
try {
245+
await loadTSVGZ(file, ['a', 'b', 'c'])
246+
} catch (e: any) {
247+
assertObjectMatch(e, { code: 'INVALID_GZIP' })
248+
}
249+
})
250+
251+
await t.step('maxRows limits the number of rows read', async () => {
252+
const file = pathToFile('/long.tsv.gz')
253+
// Use 1500 to avoid overlap with default initial capacity
254+
const headers = ['a', 'b', 'c']
255+
const text = '1\t2\t3\n'.repeat(1500)
256+
file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip'))
257+
258+
let map = await loadTSVGZ(file, headers, 0)
259+
assertEquals(map.a, [])
260+
assertEquals(map.b, [])
261+
assertEquals(map.c, [])
262+
263+
file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip'))
264+
map = await loadTSVGZ(file, headers, 1)
265+
assertEquals(map.a, ['1'])
266+
assertEquals(map.b, ['2'])
267+
assertEquals(map.c, ['3'])
268+
269+
file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip'))
270+
map = await loadTSVGZ(file, headers, 2)
271+
assertEquals(map.a, ['1', '1'])
272+
assertEquals(map.b, ['2', '2'])
273+
assertEquals(map.c, ['3', '3'])
274+
275+
file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip'))
276+
map = await loadTSVGZ(file, headers, -1)
277+
assertEquals(map.a, Array(1500).fill('1'))
278+
assertEquals(map.b, Array(1500).fill('2'))
279+
assertEquals(map.c, Array(1500).fill('3'))
280+
281+
// Check that maxRows does not truncate shorter files
282+
file.stream = streamFromString('1\t2\t3\n4\t5\t6\n7\t8\t9\n').pipeThrough(new CompressionStream('gzip'))
283+
map = await loadTSVGZ(file, headers, 4)
284+
assertEquals(map.a, ['1', '4', '7'])
285+
assertEquals(map.b, ['2', '5', '8'])
286+
assertEquals(map.c, ['3', '6', '9'])
287+
})
288+
289+
})

0 commit comments

Comments
 (0)