Skip to content

Commit a411f4c

Browse files
authored
Merge pull request #124 from effigies/fix/prune-subdatasets
feat: Add --prune option to prevent walking subtrees
2 parents d296de3 + d5cb385 commit a411f4c

File tree

6 files changed

+48
-11
lines changed

6 files changed

+48
-11
lines changed

src/files/deno.test.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ await requestReadPermission()
1212
// Use this file for testing file behavior
1313
const testUrl = import.meta.url
1414
const testPath = fromFileUrl(testUrl)
15-
const testDir = dirname(testPath)
15+
const testDir = dirname(testPath) // $REPO/src/files
1616
const testFilename = basename(testPath)
17+
const repoRoot = dirname(dirname(dirname(testPath)))
1718
const ignore = new FileIgnoreRules([])
19+
const prune = new FileIgnoreRules(['derivatives'], false)
1820

1921
Deno.test('Deno implementation of BIDSFile', async (t) => {
2022
await t.step('implements basic file properties', () => {
@@ -53,7 +55,7 @@ Deno.test('Deno implementation of BIDSFile', async (t) => {
5355
'strips BOM characters when reading UTF-8 via .text()',
5456
async () => {
5557
// BOM is invalid in JSON but shows up often from certain tools, so abstract handling it
56-
const bomDir = join(testPath, '..', '..', 'tests')
58+
const bomDir = join(repoRoot, 'src', 'tests')
5759
const bomFilename = 'bom-utf8.json'
5860
const file = new BIDSFileDeno(bomDir, bomFilename, ignore)
5961
const text = await file.text()
@@ -75,4 +77,16 @@ Deno.test('Deno implementation of FileTree', async (t) => {
7577
assert(testObj !== undefined)
7678
assertEquals(testObj.path, `/${parent}/${testFilename}`)
7779
})
80+
81+
await t.step('implements pruning', async () => {
82+
const dsDir = join(repoRoot, 'tests', 'data', 'valid_dataset')
83+
const derivFile =
84+
'derivatives/fmriprep/sub-01/ses-01/func/sub-01_ses-01_task-rest_confounds.tsv.gz'
85+
86+
const fullTree = await readFileTree(dsDir)
87+
assert(fullTree.get(derivFile))
88+
89+
const prunedTree = await readFileTree(dsDir, prune)
90+
assert(!prunedTree.get(derivFile))
91+
})
7892
})

src/files/deno.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,22 @@ async function _readFileTree(
120120
rootPath: string,
121121
relativePath: string,
122122
ignore: FileIgnoreRules,
123+
prune: FileIgnoreRules,
123124
parent?: FileTree,
124125
): Promise<FileTree> {
125126
await requestReadPermission()
126127
const name = basename(relativePath)
127128
const tree = new FileTree(relativePath, name, parent, ignore)
128129

129130
for await (const dirEntry of Deno.readDir(join(rootPath, relativePath))) {
131+
const thisPath = posix.join(relativePath, dirEntry.name)
132+
if (prune.test(thisPath)) {
133+
continue
134+
}
130135
if (dirEntry.isFile || dirEntry.isSymlink) {
131136
const file = new BIDSFileDeno(
132137
rootPath,
133-
posix.join(relativePath, dirEntry.name),
138+
thisPath,
134139
ignore,
135140
)
136141
file.parent = tree
@@ -139,8 +144,9 @@ async function _readFileTree(
139144
if (dirEntry.isDirectory) {
140145
const dirTree = await _readFileTree(
141146
rootPath,
142-
posix.join(relativePath, dirEntry.name),
147+
thisPath,
143148
ignore,
149+
prune,
144150
tree,
145151
)
146152
tree.directories.push(dirTree)
@@ -152,9 +158,13 @@ async function _readFileTree(
152158
/**
153159
* Read in the target directory structure and return a FileTree
154160
*/
155-
export async function readFileTree(rootPath: string): Promise<FileTree> {
161+
export async function readFileTree(
162+
rootPath: string,
163+
prune?: FileIgnoreRules,
164+
): Promise<FileTree> {
165+
prune ??= new FileIgnoreRules([], false)
156166
const ignore = new FileIgnoreRules([])
157-
const tree = await _readFileTree(rootPath, '/', ignore)
167+
const tree = await _readFileTree(rootPath, '/', ignore, prune)
158168
const bidsignore = tree.get('.bidsignore')
159169
if (bidsignore) {
160170
try {

src/files/ignore.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,15 @@ const defaultIgnores = [
2727
export class FileIgnoreRules {
2828
#ignore: Ignore
2929

30-
constructor(config: string[]) {
30+
constructor(
31+
config: string[],
32+
addDefaults: boolean = true,
33+
) {
3134
// @ts-expect-error
3235
this.#ignore = ignore()
33-
this.#ignore.add(defaultIgnores)
36+
if (addDefaults) {
37+
this.#ignore.add(defaultIgnores)
38+
}
3439
this.#ignore.add(config)
3540
}
3641

src/main.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import type { Config } from './setup/options.ts'
33
import * as colors from '@std/fmt/colors'
44
import { readFileTree } from './files/deno.ts'
55
import { fileListToTree } from './files/browser.ts'
6+
import { FileIgnoreRules } from './files/ignore.ts'
67
import { resolve } from '@std/path'
78
import { validate } from './validators/bids.ts'
89
import { consoleFormat, resultToJSONStr } from './utils/output.ts'
@@ -21,7 +22,10 @@ export async function main(): Promise<ValidationResult> {
2122
setupLogging(options.debug)
2223

2324
const absolutePath = resolve(options.datasetPath)
24-
const tree = await readFileTree(absolutePath)
25+
const prune = options.prune
26+
? new FileIgnoreRules(['derivatives', 'sourcedata', 'code'], false)
27+
: undefined
28+
const tree = await readFileTree(absolutePath, prune)
2529

2630
const config = options.config ? JSON.parse(Deno.readTextFileSync(options.config)) as Config : {}
2731

src/setup/options.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export type ValidatorOptions = {
2929
recursive?: boolean
3030
outfile?: string
3131
blacklistModalities: string[]
32+
prune?: boolean
3233
}
3334

3435
const modalityType = new EnumType<string>(
@@ -72,6 +73,10 @@ export const validateCommand: Command<void, void, any, string[], void> = new Com
7273
'-r, --recursive',
7374
'Validate datasets found in derivatives directories in addition to root dataset',
7475
)
76+
.option(
77+
'-p, --prune',
78+
'Prune derivatives and sourcedata directories on load (disables -r and will underestimate dataset size)',
79+
)
7580
.option(
7681
'-o, --outfile <file:string>',
7782
'File to write validation results to.',

src/tests/regression.test.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@ import { pathsToTree } from '../files/filetree.ts'
33
import { validate } from '../validators/bids.ts'
44
import type { BIDSFile } from '../types/filetree.ts'
55

6-
76
Deno.test('Regression tests', async (t) => {
87
await t.step('Verify ignored files in scans.tsv do not trigger error', async () => {
98
const paths = [
109
'/dataset_description.json',
1110
'/sub-01/anat/sub-01_T1w.nii.gz',
12-
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
11+
'/sub-01/anat/sub-01_CT.nii.gz', // unknown file
1312
'/sub-01/sub-01_scans.tsv',
1413
]
1514
const ignore = ['*_CT.nii.gz']

0 commit comments

Comments
 (0)