diff --git a/.codespellrc b/.codespellrc index 8b5342e93c..68e1006144 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,6 +1,6 @@ # Ref: https://github.com/codespell-project/codespell#using-a-config-file [codespell] -skip = .git,*.pdf,*.svg,go.sum,*.lock,.codespellrc,.yarn,node_modules,schema-validator.js,.pnp.cjs,funded_awards.json +skip = .git,*.pdf,*.svg,go.sum,*.lock,.codespellrc,.yarn,node_modules,schema-validator.js,.pnp.cjs,funded_awards.json,schema-1.1.1-datacite.json check-hidden = true ignore-regex = ^.{300,}$|/((.*\|){4,}.*\))|\b(afterAll)\b ignore-words-list = chack diff --git a/cli/src/commands/upload.ts b/cli/src/commands/upload.ts index 5c89d72e05..9b93a61368 100644 --- a/cli/src/commands/upload.ts +++ b/cli/src/commands/upload.ts @@ -72,6 +72,10 @@ export async function uploadAction( // Match OpenNeuro's server side rules for datasetTypes overriding user settings options.datasetTypes = ["raw", "derivative"] + // Use OpenNeuro's schema until datacite.yml is supported upstream + const schemaPath = new URL("../schema-1.1.1-datacite.json", import.meta.url) + options.schema = schemaPath.href + const schemaResult = await validate( await readFileTree(dataset_directory_abs), options, diff --git a/cli/src/schema-1.1.1-datacite.json b/cli/src/schema-1.1.1-datacite.json new file mode 100644 index 0000000000..796b89a893 --- /dev/null +++ b/cli/src/schema-1.1.1-datacite.json @@ -0,0 +1,15298 @@ +{ + "meta": { + "associations": { + "events": { + "selectors": ["extension != '.json'"], + "target": { "suffix": "events", "extension": ".tsv" }, + "inherit": true + }, + "aslcontext": { + "selectors": ["suffix == 'asl'", "match(extension, '\\.nii(\\.gz)?$')"], + "target": { "suffix": "aslcontext", "extension": ".tsv" }, + "inherit": true + }, + "m0scan": { + "selectors": ["suffix == 'asl'", "match(extension, '\\.nii(\\.gz)?$')"], + "target": { "suffix": "m0scan", "extension": [".nii", ".nii.gz"] }, + "inherit": false + }, + "magnitude": { + "selectors": [ + "suffix == 'fieldmap'", + "match(extension, '\\.nii(\\.gz)?$')" + ], + "target": { "suffix": "magnitude", "extension": [".nii", ".nii.gz"] }, + "inherit": false + }, + "magnitude1": { + "selectors": [ + "match(suffix, 'phase(diff|1)$')", + "match(extension, '\\.nii(\\.gz)?$')" + ], + "target": { "suffix": "magnitude1", "extension": [".nii", ".nii.gz"] }, + "inherit": false + }, + "bval": { + "selectors": [ + "intersects([suffix], ['dwi', 'epi'])", + "match(extension, '\\.nii(\\.gz)?$')" + ], + "target": { "extension": ".bval" }, + "inherit": true + }, + "bvec": { + "selectors": [ + "intersects([suffix], ['dwi', 'epi'])", + "match(extension, '\\.nii(\\.gz)?$')" + ], + "target": { "extension": ".bvec" }, + "inherit": true + }, + "channels": { + "selectors": [ + "intersects([suffix], ['eeg', 'ieeg', 'meg', 'nirs', 'motion', 'optodes'])", + "extension != '.json'" + ], + "target": { "suffix": "channels", "extension": ".tsv" }, + "inherit": true + }, + "coordsystem": { + "selectors": [ + "intersects([suffix], ['eeg', 'ieeg', 'meg', 'nirs', 'motion', 'electrodes', 'optodes'])", + "extension != '.json'" + ], + "target": { "suffix": "coordsystem", "extension": ".json" }, + "inherit": true + }, + "electrodes": { + "selectors": [ + "intersects([suffix], ['eeg', 'ieeg', 'meg'])", + "extension != '.json'" + ], + "target": { + "suffix": "electrodes", + "extension": ".tsv", + "entities": ["space"] + }, + "inherit": true + } + }, + "context": { + "type": "object", + "required": [ + "schema", + "dataset", + "path", + "size", + "sidecar", + "associations" + ], + "additionalProperties": false, + "properties": { + "schema": { + "description": "The BIDS specification schema", + "type": "object" + }, + "dataset": { + "description": "Properties and contents of the entire dataset", + "type": "object", + "required": [ + "dataset_description", + "tree", + "ignored", + "datatypes", + "modalities", + "subjects" + ], + "additionalProperties": false, + "properties": { + "dataset_description": { + "description": "Contents of /dataset_description.json", + "type": "object" + }, + "tree": { + "description": "Tree view of all files in dataset", + "type": "object" + }, + "ignored": { + "description": "Set of ignored files", + "type": "array", + "items": { "type": "string" } + }, + "datatypes": { + "description": "Data types present in the dataset", + "type": "array", + "items": { "type": "string" } + }, + "modalities": { + "description": "Modalities present in the dataset", + "type": "array", + "items": { "type": "string" } + }, + "subjects": { + "description": "Collections of subjects in dataset", + "type": "object", + "required": ["sub_dirs"], + "additionalProperties": false, + "properties": { + "sub_dirs": { + "description": "Subjects as determined by sub-* directories", + "type": "array", + "items": { "type": "string" } + }, + "participant_id": { + "description": "The participant_id column of participants.tsv", + "type": "array", + "items": { "type": "string" } + } + } + } + } + }, + "subject": { + "description": "Properties and contents of the current subject", + "type": "object", + "required": ["sessions"], + "additionalProperties": false, + "properties": { + "sessions": { + "description": "Collections of sessions in subject", + "type": "object", + "required": ["ses_dirs"], + "additionalProperties": false, + "properties": { + "ses_dirs": { + "description": "Sessions as determined by ses-* directories", + "type": "array", + "items": { "type": "string" } + }, + "session_id": { + "description": "The session_id column of sessions.tsv", + "type": "array", + "items": { "type": "string" } + } + } + } + } + }, + "path": { "description": "Path of the current file", "type": "string" }, + "size": { + "description": "Length of the current file in bytes", + "type": "integer" + }, + "entities": { + "description": "Entities parsed from the current filename", + "type": "object", + "additionalProperties": { "type": "string" } + }, + "datatype": { + "description": "Datatype of current file, for examples, anat", + "type": "string" + }, + "suffix": { "description": "Suffix of current file", "type": "string" }, + "extension": { + "description": "Extension of current file including initial dot", + "type": "string" + }, + "modality": { + "description": "Modality of current file, for examples, MRI", + "type": "string" + }, + "sidecar": { + "description": "Sidecar metadata constructed via the inheritance principle", + "type": "object" + }, + "associations": { + "description": "Associated files, indexed by suffix, selected according to the inheritance principle\n", + "type": "object", + "additionalProperties": false, + "properties": { + "events": { + "description": "Events file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated events file", + "type": "string" + }, + "onset": { + "description": "Contents of the onset column", + "type": "array", + "items": { "type": "string" } + } + } + }, + "aslcontext": { + "description": "ASL context file", + "type": "object", + "required": ["path", "n_rows"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated aslcontext file", + "type": "string" + }, + "n_rows": { + "description": "Number of rows in aslcontext.tsv", + "type": "integer" + }, + "volume_type": { + "description": "Contents of the volume_type column", + "type": "array", + "items": { "type": "string" } + } + } + }, + "m0scan": { + "description": "M0 scan file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated M0 scan file", + "type": "string" + } + } + }, + "magnitude": { + "description": "Magnitude image file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated magnitude file", + "type": "string" + } + } + }, + "magnitude1": { + "description": "Magnitude1 image file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated magnitude1 file", + "type": "string" + } + } + }, + "bval": { + "description": "B value file", + "type": "object", + "required": ["path", "n_cols", "n_rows", "values"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated bval file", + "type": "string" + }, + "n_cols": { + "description": "Number of columns in bval file", + "type": "integer" + }, + "n_rows": { + "description": "Number of rows in bval file", + "type": "integer" + }, + "values": { + "description": "B-values contained in bval file", + "type": "array", + "items": { "type": "number" } + } + } + }, + "bvec": { + "description": "B vector file", + "type": "object", + "required": ["path", "n_cols", "n_rows"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated bvec file", + "type": "string" + }, + "n_cols": { + "description": "Number of columns in bvec file", + "type": "integer" + }, + "n_rows": { + "description": "Number of rows in bvec file", + "type": "integer" + } + } + }, + "channels": { + "description": "Channels file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated channels file", + "type": "string" + }, + "type": { + "description": "Contents of the type column", + "type": "array", + "items": { "type": "string" } + }, + "short_channel": { + "description": "Contents of the short_channel column", + "type": "array", + "items": { "type": "string" } + }, + "sampling_frequency": { + "description": "Contents of the sampling_frequency column", + "type": "array", + "items": { "type": "string" } + } + } + }, + "electrodes": { + "description": "Electrodes file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated electrodes.tsv file", + "type": "string" + } + } + }, + "coordsystem": { + "description": "Coordinate system file", + "type": "object", + "required": ["path"], + "additionalProperties": false, + "properties": { + "path": { + "description": "Path to associated coordsystem file", + "type": "string" + } + } + } + } + }, + "columns": { + "description": "TSV columns, indexed by column header, values are arrays with column contents", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { "type": "string" } + } + }, + "json": { + "description": "Contents of the current JSON file", + "type": "object" + }, + "gzip": { + "description": "Parsed contents of gzip header", + "type": "object", + "required": ["timestamp"], + "additionalProperties": false, + "properties": { + "timestamp": { + "description": "Modification time, unix timestamp", + "type": "number" + }, + "filename": { "description": "Filename", "type": "string" }, + "comment": { "description": "Comment", "type": "string" } + } + }, + "nifti_header": { + "name": "NIfTI Header", + "description": "Parsed contents of NIfTI header referenced elsewhere in schema.", + "type": "object", + "required": [ + "dim_info", + "dim", + "pixdim", + "shape", + "voxel_sizes", + "xyzt_units", + "qform_code", + "sform_code", + "axis_codes" + ], + "additionalProperties": false, + "properties": { + "dim_info": { + "name": "Dimension Information", + "description": "Metadata about dimensions data.", + "type": "object", + "required": ["freq", "phase", "slice"], + "additionalProperties": false, + "properties": { + "freq": { + "name": "Frequency", + "description": "These fields encode which spatial dimension (1, 2, or 3).", + "type": "integer" + }, + "phase": { + "name": "Phase", + "description": "Corresponds to which acquisition dimension for MRI data.", + "type": "integer" + }, + "slice": { + "name": "Slice", + "description": "Slice dimensions.", + "type": "integer" + } + } + }, + "dim": { + "name": "Data Dimensions", + "description": "Data seq dimensions.", + "type": "array", + "minItems": 8, + "maxItems": 8, + "items": { "type": "integer" } + }, + "pixdim": { + "name": "Pixel Dimension", + "description": "Grid spacings (unit per dimension).", + "type": "array", + "minItems": 8, + "maxItems": 8, + "items": { "type": "number" } + }, + "shape": { + "name": "Data shape", + "description": "Data array shape, equal to dim[1:dim[0] + 1]", + "type": "array", + "minItems": 0, + "maxItems": 7, + "items": { "type": "integer" } + }, + "voxel_sizes": { + "name": "Voxel sizes", + "description": "Voxel sizes, equal to pixdim[1:dim[0] + 1]", + "type": "array", + "minItems": 0, + "maxItems": 7, + "items": { "type": "number" } + }, + "xyzt_units": { + "name": "XYZT Units", + "description": "Units of pixdim[1..4]", + "type": "object", + "required": ["xyz", "t"], + "additionalProperties": false, + "properties": { + "xyz": { + "name": "XYZ Units", + "description": "String representing the unit of voxel spacing.", + "type": "string", + "enum": ["unknown", "meter", "mm", "um"] + }, + "t": { + "name": "Time Unit", + "description": "String representing the unit of inter-volume intervals.", + "type": "string", + "enum": ["unknown", "sec", "msec", "usec"] + } + } + }, + "qform_code": { + "name": "qform code", + "description": "Use of the quaternion fields.", + "type": "integer" + }, + "sform_code": { + "name": "sform code", + "description": "Use of the affine fields.", + "type": "integer" + }, + "axis_codes": { + "name": "axis codes", + "description": "Orientation labels indicating primary direction of data axes defined with respect to the object of interest.\n", + "type": "array", + "minItems": 3, + "maxItems": 3, + "items": { + "type": "string", + "enum": ["R", "L", "A", "P", "S", "I"] + } + }, + "mrs": { + "name": "NIfTI-MRS extension", + "description": "NIfTI-MRS JSON fields", + "type": "object" + } + } + }, + "ome": { + "name": "Open Microscopy Environment fields", + "description": "Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files", + "type": "object", + "additionalProperties": false, + "properties": { + "PhysicalSizeX": { + "name": "PhysicalSizeX", + "description": "Pixels / @PhysicalSizeX", + "type": "number" + }, + "PhysicalSizeY": { + "name": "PhysicalSizeY", + "description": "Pixels / @PhysicalSizeY", + "type": "number" + }, + "PhysicalSizeZ": { + "name": "PhysicalSizeZ", + "description": "Pixels / @PhysicalSizeZ", + "type": "number" + }, + "PhysicalSizeXUnit": { + "name": "PhysicalSizeXUnit", + "description": "Pixels / @PhysicalSizeXUnit", + "type": "string" + }, + "PhysicalSizeYUnit": { + "name": "PhysicalSizeYUnit", + "description": "Pixels / @PhysicalSizeYUnit", + "type": "string" + }, + "PhysicalSizeZUnit": { + "name": "PhysicalSizeZUnit", + "description": "Pixels / @PhysicalSizeZUnit", + "type": "string" + } + } + }, + "tiff": { + "name": "TIFF", + "description": "TIFF file format metadata", + "type": "object", + "required": ["version"], + "additionalProperties": false, + "properties": { + "version": { + "name": "Version", + "description": "TIFF file format version (the second 2-byte block)", + "type": "integer" + } + } + } + } + }, + "expression_tests": [ + { "expression": "sidecar.MissingValue", "result": null }, + { "expression": "null.anything", "result": null }, + { "expression": "(null)", "result": null }, + { "expression": "null[0]", "result": null }, + { "expression": "null && true", "result": null }, + { "expression": "true && null", "result": null }, + { "expression": "false && null", "result": false }, + { "expression": "true || null", "result": true }, + { "expression": "null || true", "result": true }, + { "expression": "false || null", "result": null }, + { "expression": "!null", "result": true }, + { "expression": "intersects([], null)", "result": false }, + { "expression": "intersects(null, [])", "result": false }, + { "expression": "allequal([], null)", "result": false }, + { "expression": "allequal(null, [])", "result": false }, + { "expression": "match(null, 'pattern')", "result": null }, + { "expression": "match('string', null)", "result": false }, + { "expression": "substr(null, 1, 4)", "result": null }, + { "expression": "substr('string', null, 4)", "result": null }, + { "expression": "substr('string', 1, null)", "result": null }, + { "expression": "min(null)", "result": null }, + { "expression": "max(null)", "result": null }, + { "expression": "length(null)", "result": null }, + { "expression": "type(null)", "result": "null" }, + { "expression": "null == false", "result": false }, + { "expression": "null == true", "result": false }, + { "expression": "null != false", "result": true }, + { "expression": "null != true", "result": true }, + { "expression": "null != 1.5", "result": true }, + { "expression": "null == null", "result": true }, + { "expression": "null == 1", "result": false }, + { "expression": "\"VolumeTiming\" in null", "result": null }, + { "expression": "exists(null, \"bids-uri\")", "result": 0 }, + { "expression": "exists([], null)", "result": 0 }, + { "expression": "true || sidecar.MissingValue", "result": true }, + { "expression": "1 + 2", "result": 3 }, + { "expression": "1 - 2", "result": -1 }, + { "expression": "3 * 4", "result": 12 }, + { "expression": "3 / 2", "result": 1.5 }, + { "expression": "3 % 2", "result": 1 }, + { "expression": "\"cat\" + \"dog\"", "result": "catdog" }, + { "expression": "match('string', '.*')", "result": true }, + { "expression": "match('', '.')", "result": false }, + { "expression": "substr('string', 1, 4)", "result": "tri" }, + { "expression": "substr('string', 0, 20)", "result": "string" }, + { "expression": "type(1)", "result": "number" }, + { "expression": "type([])", "result": "array" }, + { "expression": "type({})", "result": "object" }, + { "expression": "type(true)", "result": "boolean" }, + { "expression": "intersects([1], [1, 2])", "result": [1] }, + { "expression": "intersects([1], [])", "result": false }, + { "expression": "length([1, 2, 3])", "result": 3 }, + { "expression": "length([])", "result": 0 }, + { "expression": "count([1, 2, 3], 1)", "result": 1 }, + { "expression": "index([\"i\", \"j\", \"k\"], \"i\")", "result": 0 }, + { "expression": "index([\"i\", \"j\", \"k\"], \"j\")", "result": 1 }, + { "expression": "index([\"i\", \"j\", \"k\"], \"x\")", "result": null }, + { "expression": "sorted([3, 2, 1])", "result": [1, 2, 3] }, + { + "expression": "sorted([1, 2, 5, 10], \"lexical\")", + "result": [1, 10, 2, 5] + }, + { + "expression": "sorted([\"1\", \"2\", \"5\", \"10\"])", + "result": ["1", "10", "2", "5"] + }, + { + "expression": "sorted([\"1\", \"2\", \"5\", \"10\"], \"numeric\")", + "result": ["1", "2", "5", "10"] + }, + { + "expression": "sorted([\"1\", \"2\", \"n/a\"], \"numeric\")", + "result": ["1", "2", "n/a"] + }, + { + "expression": "sorted([\"n/a\", \"2\", \"1\"], \"numeric\")", + "result": ["n/a", "1", "2"] + }, + { + "expression": "allequal(sorted([3, 2, 1]), [1, 2, 3])", + "result": true + }, + { + "expression": "sorted([9, 81, 729, 6561])", + "result": [9, 81, 729, 6561] + }, + { "expression": "min([-1, \"n/a\", 1])", "result": -1 }, + { "expression": "max([-1, \"n/a\", 1])", "result": 1 }, + { "expression": "min(42)", "result": 42 }, + { "expression": "max(42)", "result": 42 }, + { "expression": "[3, 2, 1][0]", "result": 3 }, + { "expression": "\"string\"[0]", "result": "s" } + ], + "versions": [ + "1.10.1", + "1.10.0", + "1.9.0", + "1.8.0", + "1.7.0", + "1.6.0", + "1.5.0", + "1.4.1", + "1.4.0", + "1.3.0", + "1.2.2", + "1.2.1", + "1.2.0", + "1.1.2", + "1.1.1", + "1.1.0", + "1.0.2", + "1.0.1", + "1.0.0" + ] + }, + "objects": { + "columns": { + "HED": { + "name": "HED", + "display_name": "HED", + "description": "Hierarchical Event Descriptor (HED) tags.\nSee the [HED Appendix](SPEC_ROOT/appendices/hed.md) for details.\n", + "type": "string" + }, + "abbreviation": { + "name": "abbreviation", + "display_name": "Abbreviation", + "description": "The unique label abbreviation\n", + "type": "string" + }, + "acq_time__scans": { + "name": "acq_time", + "display_name": "Scan acquisition time", + "description": "Acquisition time refers to when the first data point in each run was acquired.\nFurthermore, if this header is provided, the acquisition times of all files\nfrom the same recording MUST be identical.\nDatetime format and their deidentification are described in\n[Units](SPEC_ROOT/common-principles.md#units).\n", + "type": "string", + "format": "datetime" + }, + "acq_time__sessions": { + "name": "acq_time", + "display_name": "Session acquisition time", + "description": "Acquisition time refers to when the first data point of the first run was acquired.\nDatetime format and their deidentification are described in\n[Units](SPEC_ROOT/common-principles.md#units).\n", + "type": "string", + "format": "datetime" + }, + "age": { + "name": "age", + "display_name": "Subject age", + "description": "Numeric value in years (float or integer value).\n\nFor privacy purposes, participant ages should be capped at 89.\nUsing \"89+\" for ages above 88 is DEPRECATED.\n", + "definition": { + "LongName": "Subject age", + "Description": "Subject age in postnatal years", + "Format": "number", + "Units": "year", + "Maximum": 89 + } + }, + "cardiac": { + "name": "cardiac", + "display_name": "Cardiac measurement", + "description": "continuous pulse measurement\n", + "definition": { + "Description": "continuous pulse measurement", + "Format": "number", + "Units": "mV" + } + }, + "channel": { + "name": "channel", + "display_name": "Channel", + "description": "Channel(s) associated with an event.\nIf multiple channels are specified, they MUST be separated by a delimiter\nspecified in the `\"Delimiter\"` field describing the `channel` column.\nFor example, channels separated with a comma (`,`) require the `events.json`\nfile to contain `\"channel\": {\"Delimiter\": \",\"}`.\nIn the absence of a delimiter, tools MUST interpret any character as being part\nof a channel name.\n", + "type": "string" + }, + "color": { + "name": "color", + "display_name": "Color label", + "description": "Hexadecimal. Label color for visualization.\n", + "type": "string", + "unit": "hexadecimal" + }, + "component": { + "name": "component", + "display_name": "Component", + "description": "Description of the spatial axis or label of quaternion component associated with the channel.\nFor example, `x`,`y`,`z` for position channels,\nor `quat_x`, `quat_y`, `quat_z`, `quat_w` for quaternion orientation channels.\n", + "type": "string", + "enum": ["x", "y", "z", "quat_x", "quat_y", "quat_z", "quat_w"] + }, + "detector__channels": { + "name": "detector", + "display_name": "Detector Name", + "description": "Name of the detector as specified in the `*_optodes.tsv` file.\n`n/a` for channels that do not contain NIRS signals (for example, acceleration).\n", + "type": "string" + }, + "detector_type": { + "name": "detector_type", + "display_name": "Detector Type", + "description": "The type of detector. Only to be used if the field `DetectorType` in `*_nirs.json` is set to `mixed`.\n", + "type": "string" + }, + "derived_from": { + "name": "derived_from", + "display_name": "Derived from", + "description": "`sample-