Skip to content

Commit f7f6361

Browse files
committed
An attempt to convert parquet files to columnmaps.
1 parent f745926 commit f7f6361

File tree

4 files changed

+34
-0
lines changed

4 files changed

+34
-0
lines changed

deno.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"@cliffy/command": "jsr:@effigies/[email protected]",
3333
"@cliffy/table": "jsr:@effigies/[email protected]",
3434
"@hed/validator": "npm:[email protected]",
35+
"@hyparquet": "npm:[email protected]",
3536
"@ignore": "npm:[email protected]",
3637
"@libs/xml": "jsr:@libs/[email protected]",
3738
"@mango/nifti": "npm:@bids/[email protected]",

src/files/parquet.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { assertEquals } from '@std/assert'
2+
import { FileIgnoreRules } from './ignore.ts'
3+
import { BIDSFileDeno } from './deno.ts'
4+
5+
import { loadParquet } from './parquet.ts'
6+
7+
Deno.test('Test loading parquet file', async (t) => {
8+
const ignore = new FileIgnoreRules([])
9+
await t.step('Load participants.parquet', async () => {
10+
const path = 'participants.parquet'
11+
const root = './tests/data/'
12+
const file = new BIDSFileDeno(root, path, ignore)
13+
const participantsMap = await loadParquet(file)
14+
const keys = Object.keys(participantsMap)
15+
assertEquals(keys.length, 3)
16+
keys.map(key => assertEquals(participantsMap.get(key)?.length, 16))
17+
})
18+
})

src/files/parquet.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { asyncBufferFromFile, parquetRead, ParquetReadOptions, ColumnData } from '@hyparquet'
2+
import type { BIDSFile } from '../types/filetree.ts'
3+
import { ColumnsMap } from '../types/columns.ts'
4+
import { createUTF8Stream } from './streams.ts'
5+
6+
export async function loadParquet(file: BIDSFile, maxRows: number = -1): Promise<ColumnsMap> {
7+
let columnsMap = new ColumnsMap()
8+
const readOpts: ParquetReadOptions = {
9+
file: (await file.readBytes(file.size)).buffer,
10+
// @ts-expect-error
11+
onChunk: (data) => columnsMap[data.columnName] = data.columnData.map(entry => String(entry)),
12+
}
13+
await parquetRead(readOpts)
14+
return columnsMap
15+
}

tests/data/participants.parquet

2.85 KB
Binary file not shown.

0 commit comments

Comments
 (0)