|
6 | 6 | assertStrictEquals, |
7 | 7 | } from '@std/assert' |
8 | 8 | import { pathToFile } from './filetree.ts' |
9 | | -import { loadTSV } from './tsv.ts' |
| 9 | +import { loadTSV, loadTSVGZ } from './tsv.ts' |
10 | 10 | import { streamFromString } from '../tests/utils.ts' |
11 | 11 | import { ColumnsMap } from '../types/columns.ts' |
12 | 12 |
|
@@ -178,3 +178,112 @@ Deno.test('TSV loading', async (t) => { |
178 | 178 | // Tests will have populated the memoization cache |
179 | 179 | loadTSV.cache.clear() |
180 | 180 | }) |
| 181 | + |
| 182 | +Deno.test('TSVGZ loading', async (t) => { |
| 183 | + await t.step('No header and empty file produces empty map', async () => { |
| 184 | + const file = pathToFile('/empty.tsv.gz') |
| 185 | + file.stream = streamFromString('').pipeThrough(new CompressionStream('gzip')) |
| 186 | + |
| 187 | + const map = await loadTSVGZ(file, []) |
| 188 | + // map.size looks for a column called map, so work around it |
| 189 | + assertEquals(Object.keys(map).length, 0) |
| 190 | + }) |
| 191 | + |
| 192 | + await t.step('Empty file produces header-only map', async () => { |
| 193 | + const file = pathToFile('/empty.tsv.gz') |
| 194 | + file.stream = streamFromString('').pipeThrough(new CompressionStream('gzip')) |
| 195 | + |
| 196 | + const map = await loadTSVGZ(file, ['a', 'b', 'c']) |
| 197 | + assertEquals(map.a, []) |
| 198 | + assertEquals(map.b, []) |
| 199 | + assertEquals(map.c, []) |
| 200 | + }) |
| 201 | + |
| 202 | + await t.step('Single column file produces single column maps', async () => { |
| 203 | + const file = pathToFile('/single_column.tsv') |
| 204 | + file.stream = streamFromString('1\n2\n3\n').pipeThrough(new CompressionStream('gzip')) |
| 205 | + |
| 206 | + const map = await loadTSVGZ(file, ['a']) |
| 207 | + assertEquals(map.a, ['1', '2', '3']) |
| 208 | + }) |
| 209 | + |
| 210 | + await t.step('Mismatched header length throws issue', async () => { |
| 211 | + const file = pathToFile('/single_column.tsv.gz') |
| 212 | + file.stream = streamFromString('1\n2\n3\n').pipeThrough(new CompressionStream('gzip')) |
| 213 | + |
| 214 | + try { |
| 215 | + await loadTSVGZ(file, ['a', 'b']) |
| 216 | + } catch (e: any) { |
| 217 | + assertObjectMatch(e, { code: 'TSV_EQUAL_ROWS', line: 1 }) |
| 218 | + } |
| 219 | + }) |
| 220 | + |
| 221 | + await t.step('Missing final newline is ignored', async () => { |
| 222 | + const file = pathToFile('/missing_newline.tsv.gz') |
| 223 | + file.stream = streamFromString('1\n2\n3').pipeThrough(new CompressionStream('gzip')) |
| 224 | + |
| 225 | + const map = await loadTSVGZ(file, ['a']) |
| 226 | + assertEquals(map.a, ['1', '2', '3']) |
| 227 | + }) |
| 228 | + |
| 229 | + await t.step('Empty row throws issue', async () => { |
| 230 | + const file = pathToFile('/empty_row.tsv.gz') |
| 231 | + file.stream = streamFromString('1\t2\t3\n\n4\t5\t6\n').pipeThrough(new CompressionStream('gzip')) |
| 232 | + |
| 233 | + try { |
| 234 | + await loadTSVGZ(file, ['a', 'b', 'c']) |
| 235 | + } catch (e: any) { |
| 236 | + assertObjectMatch(e, { code: 'TSV_EMPTY_LINE', line: 2 }) |
| 237 | + } |
| 238 | + }) |
| 239 | + |
| 240 | + await t.step('Mislabeled TSV throws issue', async () => { |
| 241 | + const file = pathToFile('/mismatched_row.tsv.gz') |
| 242 | + file.stream = streamFromString('a\tb\tc\n1\t2\t3\n4\t5\n') |
| 243 | + |
| 244 | + try { |
| 245 | + await loadTSVGZ(file, ['a', 'b', 'c']) |
| 246 | + } catch (e: any) { |
| 247 | + assertObjectMatch(e, { code: 'INVALID_GZIP' }) |
| 248 | + } |
| 249 | + }) |
| 250 | + |
| 251 | + await t.step('maxRows limits the number of rows read', async () => { |
| 252 | + const file = pathToFile('/long.tsv.gz') |
| 253 | + // Use 1500 to avoid overlap with default initial capacity |
| 254 | + const headers = ['a', 'b', 'c'] |
| 255 | + const text = '1\t2\t3\n'.repeat(1500) |
| 256 | + file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip')) |
| 257 | + |
| 258 | + let map = await loadTSVGZ(file, headers, 0) |
| 259 | + assertEquals(map.a, []) |
| 260 | + assertEquals(map.b, []) |
| 261 | + assertEquals(map.c, []) |
| 262 | + |
| 263 | + file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip')) |
| 264 | + map = await loadTSVGZ(file, headers, 1) |
| 265 | + assertEquals(map.a, ['1']) |
| 266 | + assertEquals(map.b, ['2']) |
| 267 | + assertEquals(map.c, ['3']) |
| 268 | + |
| 269 | + file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip')) |
| 270 | + map = await loadTSVGZ(file, headers, 2) |
| 271 | + assertEquals(map.a, ['1', '1']) |
| 272 | + assertEquals(map.b, ['2', '2']) |
| 273 | + assertEquals(map.c, ['3', '3']) |
| 274 | + |
| 275 | + file.stream = streamFromString(text).pipeThrough(new CompressionStream('gzip')) |
| 276 | + map = await loadTSVGZ(file, headers, -1) |
| 277 | + assertEquals(map.a, Array(1500).fill('1')) |
| 278 | + assertEquals(map.b, Array(1500).fill('2')) |
| 279 | + assertEquals(map.c, Array(1500).fill('3')) |
| 280 | + |
| 281 | + // Check that maxRows does not truncate shorter files |
| 282 | + file.stream = streamFromString('1\t2\t3\n4\t5\t6\n7\t8\t9\n').pipeThrough(new CompressionStream('gzip')) |
| 283 | + map = await loadTSVGZ(file, headers, 4) |
| 284 | + assertEquals(map.a, ['1', '4', '7']) |
| 285 | + assertEquals(map.b, ['2', '5', '8']) |
| 286 | + assertEquals(map.c, ['3', '6', '9']) |
| 287 | + }) |
| 288 | + |
| 289 | +}) |
0 commit comments