Skip to content

Commit 2213042

Browse files
authored
Merge pull request #254 from effigies/feat/custom-columns
rf: Validate additional columns when defined
2 parents c145f3e + 424cad9 commit 2213042

File tree

5 files changed

+143
-94
lines changed

5 files changed

+143
-94
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
### Added
9+
10+
- Additional (dataset-defined) columns are now validated against the
11+
definitions in the sidecar files.
12+
13+
<!--
14+
### Changed
15+
16+
- A bullet item for the Changed category.
17+
18+
-->
19+
<!--
20+
### Fixed
21+
22+
- A bullet item for the Fixed category.
23+
24+
-->
25+
<!--
26+
### Deprecated
27+
28+
- A bullet item for the Deprecated category.
29+
30+
-->
31+
<!--
32+
### Removed
33+
34+
- A bullet item for the Removed category.
35+
36+
-->
37+
<!--
38+
### Security
39+
40+
- A bullet item for the Security category.
41+
42+
-->
43+
<!--
44+
### Infrastructure
45+
46+
- A bullet item for the Infrastructure category.
47+
48+
-->

src/schema/applyRules.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import { logger } from '../utils/logger.ts'
66
import { compile } from '../validators/json.ts'
77
import type { DefinedError } from '@ajv'
88
import {
9-
evalAdditionalColumns,
109
evalColumns,
1110
evalIndexColumns,
1211
evalInitialColumns,
@@ -123,8 +122,6 @@ const evalMap: Record<
123122
// @ts-expect-error
124123
columns: evalColumns,
125124
// @ts-expect-error
126-
additional_columns: evalAdditionalColumns,
127-
// @ts-expect-error
128125
initial_columns: evalInitialColumns,
129126
// @ts-expect-error
130127
index_columns: evalIndexColumns,

src/schema/tables.test.ts

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import { assertEquals } from '@std/assert'
33
import { loadSchema } from '../setup/loadSchema.ts'
44
import {
5-
evalAdditionalColumns,
65
evalColumns,
76
evalIndexColumns,
87
evalInitialColumns,
@@ -186,6 +185,48 @@ Deno.test('tables eval* tests', async (t) => {
186185
assertEquals(issues.length, 1)
187186
})
188187

188+
await t.step('validate additional column with sidecar', () => {
189+
const context = {
190+
path: '/participants.tsv',
191+
extension: '.tsv',
192+
sidecar: {
193+
myAge: {
194+
Description: 'Age in weeks',
195+
Format: 'number',
196+
},
197+
mySex: {
198+
Description: 'Phenotypic sex',
199+
Format: 'string',
200+
Levels: {
201+
'F': { Description: 'Female' },
202+
'M': { Description: 'Male' },
203+
'O': { Description: 'Other' },
204+
},
205+
},
206+
},
207+
sidecarKeyOrigin: {
208+
myAge: '/participants.json',
209+
mySex: '/participants.json',
210+
},
211+
columns: new ColumnsMap(Object.entries({
212+
participant_id: ['sub-01', 'sub-02', 'sub-03'],
213+
myAge: ['10', '20', '89+'],
214+
mySex: ['M', 'F', 'O'],
215+
})),
216+
dataset: { issues: new DatasetIssues() },
217+
}
218+
const rule = schemaDefs.rules.tabular_data.modality_agnostic.Participants
219+
evalColumns(rule, context, schema, 'rules.tabular_data.modality_agnostic.Participants')
220+
221+
// myAge does not get the pseudo-age warning
222+
// mySex doesn't raise issues
223+
const issues = context.dataset.issues.get({ code: 'TSV_VALUE_INCORRECT_TYPE' })
224+
assertEquals(issues.length, 1)
225+
assertEquals(issues[0].subCode, 'myAge')
226+
assertEquals(issues[0].line, 4)
227+
assertEquals(issues[0].issueMessage, "'89+'")
228+
})
229+
189230
await t.step('verify column ordering', () => {
190231
const context = {
191232
path: '/sub-01/sub-01_scans.tsv',
@@ -246,11 +287,11 @@ Deno.test('tables eval* tests', async (t) => {
246287
dataset: { issues: new DatasetIssues() },
247288
}
248289
const rule = schemaDefs.rules.tabular_data.made_up.MadeUp
249-
evalAdditionalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
290+
evalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
250291
assertEquals(context.dataset.issues.size, 0)
251292

252293
context.columns['extra'] = [1, 2, 3]
253-
evalAdditionalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
294+
evalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
254295
assertEquals(
255296
context.dataset.issues.get({ code: 'TSV_ADDITIONAL_COLUMNS_NOT_ALLOWED' }).length,
256297
1,
@@ -270,18 +311,18 @@ Deno.test('tables eval* tests', async (t) => {
270311
}
271312
const rule = schemaDefs.rules.tabular_data.made_up.MadeUp
272313
rule.additional_columns = 'allowed_if_defined'
273-
evalAdditionalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
314+
evalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
274315
assertEquals(context.dataset.issues.size, 0)
275316

276317
context['sidecar'] = {}
277-
evalAdditionalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
318+
evalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
278319
assertEquals(
279320
context.dataset.issues.get({ code: 'TSV_ADDITIONAL_COLUMNS_MUST_DEFINE' }).length,
280321
1,
281322
)
282323

283324
rule.additional_columns = 'allowed'
284-
evalAdditionalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
325+
evalColumns(rule, context, schema, 'rules.tabular_data.made_up.MadeUp')
285326
assertEquals(
286327
context.dataset.issues.get({ code: 'TSV_ADDITIONAL_COLUMNS_UNDEFINED' }).length,
287328
1,

src/schema/tables.ts

Lines changed: 47 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -218,46 +218,56 @@ export function evalColumns(
218218
schemaPath: string,
219219
): void {
220220
if (!rule.columns || !['.tsv', '.tsv.gz'].includes(context.extension)) return
221-
const headers = [...Object.keys(context.columns)]
222-
for (const [ruleHeader, requirement] of Object.entries(rule.columns)) {
223-
const columnObject: ColumnSchema = schema.objects.columns[ruleHeader]
221+
const columns = rule.columns as Record<string, string | { level: string }>
224222

225-
// What is this?
226-
if (!('name' in columnObject) || !columnObject['name']) {
227-
return
228-
}
223+
const columnLookup = Object.fromEntries(
224+
Object.keys(rule.columns).map((col) => [schema.objects.columns[col].name, col]),
225+
)
226+
const additionalColumns = Object.keys(context.columns).filter((name) => !(name in columnLookup))
229227

230-
const name = columnObject.name
231-
if (!headers.includes(name)) {
232-
if (
233-
getRequirement(rule as ColumnRule, ruleHeader) === 'required' &&
234-
// A more precise error will be generated by evalInitialColumns
235-
!(rule?.initial_columns && rule.initial_columns.includes(ruleHeader))
236-
) {
237-
context.dataset.issues.add({
238-
code: 'TSV_COLUMN_MISSING',
239-
subCode: name,
240-
location: context.path,
241-
rule: schemaPath,
242-
})
243-
}
244-
return
245-
}
228+
for (const name of new Set([...Object.keys(columnLookup), ...additionalColumns])) {
229+
const ruleHeader = columnLookup[name]
230+
const sidecarDef = context.sidecar[name] as ColumnDefinition | undefined
231+
const requirement = getRequirement(rule as ColumnRule, ruleHeader)
232+
const column = context.columns[name] as string[]
233+
234+
const issue = { subCode: name, location: context.path, rule: schemaPath }
246235

247236
let signature: ValueSignature
248-
try {
249-
signature = getValueSignature(columnObject, context?.sidecar[name])
250-
} catch (e: any) {
251-
if (e?.code) {
252-
context.dataset.issues.add({
253-
...e,
254-
subCode: name,
255-
location: context.sidecarKeyOrigin[name],
256-
rule: schemaPath,
257-
})
258-
signature = getValueSignature(columnObject, undefined)
259-
} else {
260-
throw e
237+
if (!ruleHeader) { // Additional column
238+
if (requirement === 'not_allowed' || !sidecarDef) {
239+
const code = requirement === 'not_allowed'
240+
? 'TSV_ADDITIONAL_COLUMNS_NOT_ALLOWED'
241+
: requirement === 'allowed_if_defined'
242+
? 'TSV_ADDITIONAL_COLUMNS_MUST_DEFINE'
243+
: 'TSV_ADDITIONAL_COLUMNS_UNDEFINED'
244+
context.dataset.issues.add({ code, ...issue })
245+
continue
246+
}
247+
signature = extractDefinition(sidecarDef)
248+
} else {
249+
const columnObject = schema.objects.columns[ruleHeader]
250+
if (!column) {
251+
if (requirement === 'required' && !(rule?.initial_columns?.includes(ruleHeader))) {
252+
context.dataset.issues.add({ code: 'TSV_COLUMN_MISSING', ...issue })
253+
}
254+
continue
255+
}
256+
257+
try {
258+
signature = getValueSignature(columnObject, sidecarDef)
259+
} catch (e: any) {
260+
if (e?.code) {
261+
context.dataset.issues.add({
262+
...e,
263+
subCode: name,
264+
location: context.sidecarKeyOrigin[name],
265+
rule: schemaPath,
266+
})
267+
signature = extractSchema(columnObject)
268+
} else {
269+
throw e
270+
}
261271
}
262272
}
263273

@@ -267,17 +277,9 @@ export function evalColumns(
267277

268278
const spec = compileSignature(signature, schema.objects.formats)
269279

270-
const issue = {
271-
code: 'TSV_VALUE_INCORRECT_TYPE',
272-
subCode: name,
273-
location: context.path,
274-
rule: schemaPath,
275-
}
276-
277280
const ageCheck = name === 'age'
278281
let ageWarningIssued = false
279282

280-
const column = context.columns[name] as string[]
281283
for (const [index, value] of column.entries()) {
282284
if (!checkValue(value, spec)) {
283285
if (ageCheck && value === '89+') {
@@ -295,6 +297,7 @@ export function evalColumns(
295297
(value.match(/^\s*(NA|na|nan|NaN|n\/a)\s*$/) ? ", did you mean 'n/a'?" : '')
296298
context.dataset.issues.add({
297299
...issue,
300+
code: 'TSV_VALUE_INCORRECT_TYPE',
298301
line: index + 2,
299302
issueMessage,
300303
})
@@ -352,46 +355,6 @@ export function evalInitialColumns(
352355
})
353356
}
354357

355-
export function evalAdditionalColumns(
356-
rule: GenericRule,
357-
context: BIDSContext,
358-
schema: Schema,
359-
schemaPath: string,
360-
): void {
361-
if (!['.tsv', '.tsv.gz'].includes(context.extension)) return
362-
const headers = Object.keys(context?.columns)
363-
if (rule.columns) {
364-
if (!rule.additional_columns || rule.additional_columns === 'n/a') {
365-
// Old schemas might be missing the field, so be permissive.
366-
// New schemas indicate it is not applicable with 'n/a'.
367-
return
368-
}
369-
const ruleHeadersNames = Object.keys(rule.columns).map(
370-
(x) => schema.objects.columns[x].name,
371-
)
372-
let extraCols = headers.filter(
373-
(header) => !ruleHeadersNames.includes(header),
374-
)
375-
376-
if (rule.additional_columns?.startsWith('allowed')) {
377-
extraCols = extraCols.filter((header) => !(header in context.sidecar))
378-
}
379-
const code = rule.additional_columns === 'not_allowed'
380-
? 'TSV_ADDITIONAL_COLUMNS_NOT_ALLOWED'
381-
: rule.additional_columns === 'allowed_if_defined'
382-
? 'TSV_ADDITIONAL_COLUMNS_MUST_DEFINE'
383-
: 'TSV_ADDITIONAL_COLUMNS_UNDEFINED'
384-
const issue = {
385-
code,
386-
location: context.path,
387-
rule: schemaPath,
388-
}
389-
for (const col of extraCols) {
390-
context.dataset.issues.add({ ...issue, subCode: col })
391-
}
392-
}
393-
}
394-
395358
export function evalIndexColumns(
396359
rule: GenericRule,
397360
context: BIDSContext,

0 commit comments

Comments
 (0)