Skip to content

Commit acc89e7

Browse files
authored
Merge pull request #282 from effigies/feat/context-queue
feat: Start loading contexts earlier, buffer contexts for better concurrency
2 parents e208822 + ac133ef commit acc89e7

File tree

11 files changed

+109
-37
lines changed

11 files changed

+109
-37
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### Changed
2+
3+
- Validation context generation was tweaked to improve concurrency, giving 4x validation
4+
speedups in some cases.

src/schema/context.test.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,24 @@
11
import { assert, assertEquals, assertObjectMatch } from '@std/assert'
22
import type { DatasetIssues } from '../issues/datasetIssues.ts'
3+
import type { BIDSFile } from '../types/filetree.ts'
4+
import type { FileTree } from '../types/filetree.ts'
5+
import type { BIDSContextDataset } from './context.ts'
36
import { BIDSContext } from './context.ts'
47
import { dataFile, rootFileTree } from './fixtures.test.ts'
58

9+
/* Async helper to create a loaded BIDSContext */
10+
export async function makeBIDSContext(
11+
file: BIDSFile,
12+
dsContext?: BIDSContextDataset,
13+
fileTree?: FileTree,
14+
): Promise<BIDSContext> {
15+
const context = new BIDSContext(file, dsContext, fileTree)
16+
await context.loaded
17+
return context
18+
}
19+
620
Deno.test('test context LoadSidecar', async (t) => {
7-
const context = new BIDSContext(dataFile)
8-
await context.loadSidecar()
21+
const context = await makeBIDSContext(dataFile)
922
await t.step('sidecar overwrites correct fields', () => {
1023
const { rootOverwrite, subOverwrite } = context.sidecar
1124
assertObjectMatch(context.sidecar, {
@@ -30,8 +43,7 @@ Deno.test('test context LoadSidecar', async (t) => {
3043
})
3144

3245
Deno.test('test context loadSubjects', async (t) => {
33-
const context = new BIDSContext(dataFile, undefined, rootFileTree)
34-
await context.loadSubjects()
46+
const context = await makeBIDSContext(dataFile, undefined, rootFileTree)
3547
await t.step('context produces correct subjects object', () => {
3648
assert(context.dataset.subjects, 'subjects object exists')
3749
assert(context.dataset.subjects.sub_dirs.length == 1, 'there is only one sub dir found')

src/schema/context.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ export class BIDSContext implements Context {
142142
file: BIDSFile
143143
filenameRules: string[]
144144
sidecarKeyOrigin: Record<string, string>
145+
loaded: Promise<void>
145146

146147
constructor(
147148
file: BIDSFile,
@@ -167,6 +168,7 @@ export class BIDSContext implements Context {
167168
this.columns = new ColumnsMap() as Record<string, string[]>
168169
this.json = {}
169170
this.associations = {} as Associations
171+
this.loaded = this.asyncLoads()
170172
}
171173

172174
get schema(): Schema {

src/schema/expressionLanguage.test.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ import {
66
prepareContext,
77
} from './expressionLanguage.ts'
88
import { dataFile, rootFileTree } from './fixtures.test.ts'
9-
import { BIDSContext } from './context.ts'
9+
import type { BIDSContext } from './context.ts'
10+
import { makeBIDSContext } from './context.test.ts'
1011
import type { DatasetIssues } from '../issues/datasetIssues.ts'
1112

1213
Deno.test('test expression functions', async (t) => {
13-
const context = new BIDSContext(dataFile, undefined, rootFileTree)
14+
const context = await makeBIDSContext(dataFile, undefined, rootFileTree)
1415

1516
await t.step('index function', () => {
1617
const index = expressionFunctions.index

src/schema/walk.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { BIDSFile, FileOpener, FileTree } from '../types/filetree.ts'
33
import type { DatasetIssues } from '../issues/datasetIssues.ts'
44
import { loadTSV } from '../files/tsv.ts'
55
import { loadJSON } from '../files/json.ts'
6+
import { queuedAsyncIterator } from '../utils/queue.ts'
67

78
function* quickWalk(dir: FileTree): Generator<BIDSFile> {
89
for (const file of dir.files) {
@@ -38,15 +39,23 @@ function pseudoFile(dir: FileTree, opaque: boolean): BIDSFile {
3839
)
3940
}
4041

42+
type CleanupFunction = () => void
43+
4144
/** Recursive algorithm for visiting each file in the dataset, creating a context */
4245
async function* _walkFileTree(
4346
fileTree: FileTree,
4447
dsContext: BIDSContextDataset,
45-
): AsyncIterable<BIDSContext> {
48+
): AsyncIterable<BIDSContext | CleanupFunction> {
4649
for (const file of fileTree.files) {
50+
if (file.ignored) {
51+
continue
52+
}
4753
yield new BIDSContext(file, dsContext)
4854
}
4955
for (const dir of fileTree.directories) {
56+
if (dir.ignored) {
57+
continue
58+
}
5059
const pseudo = dsContext.isPseudoFile(dir)
5160
const opaque = pseudo || dsContext.isOpaqueDirectory(dir)
5261
const context = new BIDSContext(pseudoFile(dir, opaque), dsContext)
@@ -56,13 +65,19 @@ async function* _walkFileTree(
5665
yield* _walkFileTree(dir, dsContext)
5766
}
5867
}
59-
loadTSV.cache.delete(fileTree.path)
60-
loadJSON.cache.delete(fileTree.path)
68+
yield () => {
69+
loadTSV.cache.delete(fileTree.path)
70+
loadJSON.cache.delete(fileTree.path)
71+
}
6172
}
6273

6374
/** Walk all files in the dataset and construct a context for each one */
6475
export async function* walkFileTree(
6576
dsContext: BIDSContextDataset,
77+
bufferSize: number = 1,
6678
): AsyncIterable<BIDSContext> {
67-
yield* _walkFileTree(dsContext.tree, dsContext)
79+
for await (const context of queuedAsyncIterator(_walkFileTree(dsContext.tree, dsContext), bufferSize)) {
80+
await context.loaded
81+
yield context
82+
}
6883
}

src/tests/local/hed-integration.test.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import { assert, assertEquals } from '@std/assert'
33
import { readFileTree } from '../../files/deno.ts'
44
import type { DatasetIssues } from '../../issues/datasetIssues.ts'
55
import { loadSchema } from '../../setup/loadSchema.ts'
6-
import { BIDSContext, BIDSContextDataset } from '../../schema/context.ts'
6+
import { BIDSContextDataset } from '../../schema/context.ts'
7+
import { makeBIDSContext } from '../../schema/context.test.ts'
78
import { BIDSFile, FileTree } from '../../types/filetree.ts'
89
import type { GenericSchema } from '../../types/schema.ts'
910
import { hedValidate } from '../../validators/hed.ts'
@@ -21,8 +22,7 @@ Deno.test('hed-validator not triggered', async (t) => {
2122
const eventFile = tree.get('sub-01/func/sub-01_task-rhymejudgment_events.tsv')
2223
assert(eventFile !== undefined)
2324
assert(eventFile instanceof BIDSFile)
24-
const context = new BIDSContext(eventFile, dsContext)
25-
await context.asyncLoads()
25+
const context = await makeBIDSContext(eventFile, dsContext)
2626
await hedValidate(schema as unknown as GenericSchema, context)
2727
assert(context.dataset.issues.size === 0)
2828
})
@@ -41,8 +41,7 @@ Deno.test('hed-validator fails with bad schema version', async (t) => {
4141
const eventFile = tree.get('sub-002/eeg/sub-002_task-FacePerception_run-3_events.tsv')
4242
assert(eventFile !== undefined)
4343
assert(eventFile instanceof BIDSFile)
44-
const context = new BIDSContext(eventFile, dsContext)
45-
await context.asyncLoads()
44+
const context = await makeBIDSContext(eventFile, dsContext)
4645
await hedValidate(schema as unknown as GenericSchema, context)
4746
assertEquals(context.dataset.issues.size, 1)
4847
assertEquals(context.dataset.issues.get({ code: 'HED_ERROR' }).length, 1)

src/utils/queue.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
export async function* bufferAsyncIterator<T>(
2+
asyncIterator: AsyncIterable<T>,
3+
bufferSize: number = 5,
4+
): AsyncGenerator<T, void, undefined> {
5+
const iterator = asyncIterator[Symbol.asyncIterator]()
6+
const promises: Promise<IteratorResult<T>>[] = []
7+
8+
// Initialize buffer
9+
for (let i = 0; i < bufferSize; i++) {
10+
promises.push(iterator.next())
11+
}
12+
13+
while (promises.length > 0) {
14+
const result = await promises.shift()!
15+
16+
if (!result.done) {
17+
yield result.value
18+
// Keep buffer full
19+
promises.push(iterator.next())
20+
}
21+
}
22+
}
23+
24+
type CleanupFunction = () => void
25+
26+
export async function* cleanupAsyncIterator<T extends object>(
27+
asyncIterator: AsyncIterable<T | CleanupFunction>,
28+
): AsyncGenerator<T, void, undefined> {
29+
for await (const item of asyncIterator) {
30+
if (typeof item === 'function') {
31+
item()
32+
} else {
33+
yield item
34+
}
35+
}
36+
}
37+
38+
export async function* queuedAsyncIterator<T extends object>(
39+
asyncIterator: AsyncIterable<T | CleanupFunction>,
40+
bufferSize: number = 5,
41+
): AsyncGenerator<T, void, undefined> {
42+
yield* cleanupAsyncIterator(bufferAsyncIterator(asyncIterator, bufferSize))
43+
}

src/validators/bids.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,18 +108,13 @@ export async function validate(
108108
return false
109109
})
110110

111-
for await (const context of walkFileTree(dsContext)) {
112-
// TODO - Skip ignored files for now (some tests may reference ignored files)
113-
if (context.file.ignored) {
114-
continue
115-
}
111+
for await (const context of walkFileTree(dsContext, 20)) {
116112
if (
117113
dsContext.dataset_description.DatasetType == 'raw' &&
118114
context.file.path.includes('derivatives')
119115
) {
120116
continue
121117
}
122-
await context.asyncLoads()
123118
// Run majority of checks
124119
for (const check of perContextChecks) {
125120
await check(schema as unknown as GenericSchema, context)

src/validators/filenameIdentify.test.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { assertEquals } from '@std/assert'
22
import { SEPARATOR_PATTERN } from '@std/path'
3-
import { BIDSContext } from '../schema/context.ts'
3+
import { makeBIDSContext } from '../schema/context.test.ts'
44
import { _findRuleMatches, findDirRuleMatches, hasMatch } from './filenameIdentify.ts'
55
import { BIDSFileDeno } from '../files/deno.ts'
66
import { FileIgnoreRules } from '../files/ignore.ts'
@@ -27,7 +27,7 @@ Deno.test('test _findRuleMatches', async (t) => {
2727
await t.step('Rule stem matches', async () => {
2828
const fileName = 'participants.json'
2929
const file = new BIDSFileDeno(PATH, fileName, ignore)
30-
const context = new BIDSContext(file)
30+
const context = await makeBIDSContext(file)
3131
_findRuleMatches(node, schemaPath, context)
3232
assertEquals(context.filenameRules[0], schemaPath)
3333
})
@@ -38,7 +38,7 @@ Deno.test('test _findRuleMatches', async (t) => {
3838
async () => {
3939
const fileName = 'task-rest_bold.json'
4040
const file = new BIDSFileDeno(PATH, fileName, ignore)
41-
const context = new BIDSContext(file)
41+
const context = await makeBIDSContext(file)
4242
_findRuleMatches(recurseNode, schemaPath, context)
4343
assertEquals(context.filenameRules[0], `${schemaPath}.recurse`)
4444
},
@@ -49,7 +49,7 @@ Deno.test('test hasMatch', async (t) => {
4949
await t.step('hasMatch', async () => {
5050
const fileName = '/sub-01/ses-01/func/sub-01_ses-01_task-nback_run-01_bold.nii'
5151
const file = new BIDSFileDeno(PATH, fileName, ignore)
52-
const context = new BIDSContext(file)
52+
const context = await makeBIDSContext(file)
5353
hasMatch(schema, context)
5454
})
5555

@@ -58,7 +58,7 @@ Deno.test('test hasMatch', async (t) => {
5858
const [dir, base] = tmpFile.split(SEPARATOR_PATTERN)
5959
const file = new BIDSFileDeno(dir, `/${base}`, ignore)
6060

61-
const context = new BIDSContext(file)
61+
const context = await makeBIDSContext(file)
6262
await hasMatch(schema, context)
6363
assertEquals(
6464
context.dataset.issues.get({
@@ -73,7 +73,7 @@ Deno.test('test hasMatch', async (t) => {
7373
const path = `${PATH}/../bids-examples/fnirs_automaticity`
7474
const fileName = 'events.json'
7575
const file = new BIDSFileDeno(path, fileName, ignore)
76-
const context = new BIDSContext(file)
76+
const context = await makeBIDSContext(file)
7777
context.filenameRules = [
7878
'rules.files.raw.events.events__mri',
7979
'rules.files.raw.events.events__pet',
@@ -87,7 +87,7 @@ Deno.test('test directoryIdentify', async (t) => {
8787
await t.step('Test entity based rule', async () => {
8888
const fileName = '/sub-01/'
8989
const file = new BIDSFileDeno(PATH, fileName, ignore)
90-
const context = new BIDSContext(file)
90+
const context = await makeBIDSContext(file)
9191
context.directory = true
9292
await findDirRuleMatches(schema, context)
9393
assertEquals(context.filenameRules.length, 1)
@@ -96,7 +96,7 @@ Deno.test('test directoryIdentify', async (t) => {
9696
await t.step('Test name based rule', async () => {
9797
const fileName = '/derivatives/'
9898
const file = new BIDSFileDeno(PATH, fileName, ignore)
99-
const context = new BIDSContext(file)
99+
const context = await makeBIDSContext(file)
100100
context.directory = true
101101
await findDirRuleMatches(schema, context)
102102
assertEquals(context.filenameRules.length, 1)
@@ -105,7 +105,7 @@ Deno.test('test directoryIdentify', async (t) => {
105105
await t.step('Test value based rule', async () => {
106106
const fileName = '/func/'
107107
const file = new BIDSFileDeno(`${PATH}/sub-01/ses-01`, fileName, ignore)
108-
const context = new BIDSContext(file)
108+
const context = await makeBIDSContext(file)
109109
context.directory = true
110110
await findDirRuleMatches(schema, context)
111111
assertEquals(context.filenameRules.length, 1)

src/validators/filenameValidate.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { FileTree } from '../types/filetree.ts'
22
import type { GenericSchema } from '../types/schema.ts'
33
import { assertEquals } from '@std/assert'
4-
import { BIDSContext } from '../schema/context.ts'
4+
import { makeBIDSContext } from '../schema/context.test.ts'
55
import { type atRoot, type entityLabelCheck, missingLabel } from './filenameValidate.ts'
66
import type { BIDSFileDeno } from '../files/deno.ts'
77
import { pathToFile } from '../files/filetree.test.ts'
@@ -12,7 +12,7 @@ const schema = (await loadSchema()) as unknown as GenericSchema
1212

1313
Deno.test('test missingLabel', async (t) => {
1414
await t.step('File with underscore and no hyphens errors out.', async () => {
15-
const context = new BIDSContext(pathToFile('/no_label_entities.wav'))
15+
const context = await makeBIDSContext(pathToFile('/no_label_entities.wav'))
1616
// Need some suffix rule to trigger the check,
1717
// otherwise this is trigger-happy.
1818
context.filenameRules = ['rules.files.raw.dwi.dwi']
@@ -31,7 +31,7 @@ Deno.test('test missingLabel', async (t) => {
3131
await t.step(
3232
"File with underscores and hyphens doesn't error out.",
3333
async () => {
34-
const context = new BIDSContext(pathToFile('/we-do_have-some_entities.wav'))
34+
const context = await makeBIDSContext(pathToFile('/we-do_have-some_entities.wav'))
3535
// Doesn't really matter that the rule doesn't apply
3636
context.filenameRules = ['rules.files.raw.dwi.dwi']
3737

0 commit comments

Comments
 (0)