diff --git a/.github/workflows/validate_datasets.yml b/.github/workflows/validate_datasets.yml index c71ecf609..3b3edb2cb 100644 --- a/.github/workflows/validate_datasets.yml +++ b/.github/workflows/validate_datasets.yml @@ -38,7 +38,7 @@ jobs: - name: Set up Node.js if: matrix.bids-validator == 'legacy' - uses: actions/setup-node@v6 + uses: actions/setup-node@v5 with: node-version: 22 @@ -107,7 +107,7 @@ jobs: fi - name: Skip legacy validation for post-legacy datasets - run: for DS in mrs_* dwi_deriv pet006 pheno004 volume_timing; do touch $DS/.SKIP_VALIDATION; done + run: for DS in mrs_* dwi_deriv pet006 pheno004 volume_timing provenance_*; do touch $DS/.SKIP_VALIDATION; done if: matrix.bids-validator == 'legacy' - name: Skip stable validation for datasets with unreleased validator features @@ -119,7 +119,7 @@ jobs: - name: Skip main validation for datasets with unreleased spec features # Replace ${EMPTY} with dataset patterns, when this is needed # Reset to "for DS in ${EMPTY}; ..." after a spec release - run: for DS in dwi_deriv pheno004; do touch $DS/.SKIP_VALIDATION; done + run: for DS in dwi_deriv pheno004 provenance_*; do touch $DS/.SKIP_VALIDATION; done if: matrix.bids-validator != 'dev' - name: Set BIDS_SCHEMA variable for dev version @@ -128,7 +128,7 @@ jobs: # Update this URL to the schema.json from PRs to the spec, when needed. # If this variable is unset, dev will generally track the latest development # release of https://jsr.io/@bids/schema - run: echo BIDS_SCHEMA=https://bids-specification.readthedocs.io/en/latest/schema.json >> $GITHUB_ENV + run: echo BIDS_SCHEMA=https://bids-specification--2099.org.readthedocs.build/en/2099/schema.json >> $GITHUB_ENV - name: Validate all BIDS datasets using bids-validator run: | diff --git a/provenance_manual/.bidsignore b/provenance_manual/.bidsignore new file mode 100644 index 000000000..e69de29bb diff --git a/provenance_manual/README.md b/provenance_manual/README.md new file mode 100644 index 000000000..a9b17c855 --- /dev/null +++ b/provenance_manual/README.md @@ -0,0 +1,49 @@ +# Provenance of manual brain segmentations + +## Goal + +This example aims at showing provenance metadata for a study dataset in which several experts performed manual brain segmentations from the same T1w file. + +## Directory tree + +> [!NOTE] +> Note that the `docs/` directory contains explanatory data (see [Provenance as a RDF graph](#provenance-as-a-rdf-graph)) that is not required to encode provenance. + +``` +. +├── dataset_description.json +├── derivatives +│ └── seg +│ ├── dataset_description.json +│ ├── descriptions.tsv +│ ├── docs +│ │ ├── prov-seg.jsonld +│ │ └── prov-seg.png +│ ├── prov +│ │ ├── provenance.tsv +│ │ ├── prov-seg_desc-exp1_act.json +│ │ ├── prov-seg_desc-exp1_soft.json +│ │ ├── prov-seg_desc-exp2_act.json +│ │ ├── prov-seg_desc-exp2_soft.json +│ │ └── prov-seg_ent.json +│ └── sub-001 +│ └── anat +│ ├── sub-001_space-orig_desc-exp1_dseg.json +│ ├── sub-001_space-orig_desc-exp1_dseg.nii.gz +│ ├── sub-001_space-orig_desc-exp2_dseg.json +│ └── sub-001_space-orig_desc-exp2_dseg.nii.gz +├── README.md +└── sourcedata + └── raw + ├── dataset_description.json + └── sub-001 + └── anat + ├── sub-001_T1w.json + └── sub-001_T1w.nii.gz +``` + +## Provenance as a RDF graph + +Provenance metadata can be aggregated as a JSON-LD RDF graph, which is available in [`derivatives/seg/docs/prov-seg.jsonld`](derivatives/seg/docs/prov-seg.jsonld). This is a rendered version of the graph, also available in [`derivatives/seg/docs/prov-seg.png`](derivatives/seg/docs/prov-seg.png). + +![Rendered version of the RDF graph](derivatives/seg/docs/prov-seg.png) diff --git a/provenance_manual/dataset_description.json b/provenance_manual/dataset_description.json new file mode 100644 index 000000000..901b44576 --- /dev/null +++ b/provenance_manual/dataset_description.json @@ -0,0 +1,9 @@ +{ + "Name": "Brain segmentations from 2 experts", + "BIDSVersion": "1.10.0", + "DatasetType": "study", + "License": "CC0", + "Authors": [ + "Boris Clénet" + ] +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/dataset_description.json b/provenance_manual/derivatives/seg/dataset_description.json new file mode 100644 index 000000000..dc5e4cb16 --- /dev/null +++ b/provenance_manual/derivatives/seg/dataset_description.json @@ -0,0 +1,12 @@ +{ + "Name": "Manual brain segmentations", + "BIDSVersion": "1.10.0", + "DatasetType": "derivative", + "License": "CC0", + "Authors": [ + "Boris Clénet" + ], + "DatasetLinks": { + "raw": "../../sourcedata/raw" + } +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/descriptions.tsv b/provenance_manual/derivatives/seg/descriptions.tsv new file mode 100644 index 000000000..371d9ef6d --- /dev/null +++ b/provenance_manual/derivatives/seg/descriptions.tsv @@ -0,0 +1,3 @@ +desc_id description +desc-seg1 Files generated by expert #1 +desc-seg2 Files generated by expert #2 diff --git a/provenance_manual/derivatives/seg/docs/prov-seg.jsonld b/provenance_manual/derivatives/seg/docs/prov-seg.jsonld new file mode 100644 index 000000000..cb6fd3550 --- /dev/null +++ b/provenance_manual/derivatives/seg/docs/prov-seg.jsonld @@ -0,0 +1,45 @@ +{ + "BIDSProvVersion": "0.0.1", + "@context": "https://raw.githubusercontent.com/bclenet/bids-specification/refs/heads/BEP028_spec/src/provenance-context.json", + "Records": { + "Software": [], + "Activities": [ + { + "Id": "bids::prov#segmentation-nO5RGsrb", + "Label": "Manual brain segmentation", + "Command": null, + "Used": [ + "bids:raw:sub-001/anat/sub-001_T1w.nii.gz" + ] + }, + { + "Id": "bids::prov#segmentation-mOOypIYB", + "Label": "Manual brain segmentation", + "Command": null, + "Used": [ + "bids:raw:sub-001/anat/sub-001_T1w.nii.gz" + ] + } + ], + "ProvEntities": [ + { + "Id": "bids:raw:sub-001/anat/sub-001_T1w.nii.gz", + "Label": "sub-001/anat/sub-001_T1w.nii.gz", + "AtLocation": "../../sourcedata/raw/sub-001/anat/sub-001_T1w.nii.gz" + }, + { + "Id": "bids::sub-001/anat/sub-001_space-orig_desc-exp1_dseg.nii.gz", + "Label": "sub-001_space-orig_desc-exp1_dseg.nii.gz", + "AtLocation": "sub-001/anat/sub-001_space-orig_desc-exp1_dseg.nii.gz", + "GeneratedBy": "bids::prov#segmentation-nO5RGsrb" + }, + { + "Id": "bids::sub-001/anat/sub-001_space-orig_desc-exp2_dseg.nii.gz", + "Label": "sub-001_space-orig_desc-exp2_dseg.nii.gz", + "AtLocation": "sub-001/anat/sub-001_space-orig_desc-exp2_dseg.nii.gz", + "GeneratedBy": "bids::prov#segmentation-mOOypIYB" + } + ], + "Environments": [] + } +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/docs/prov-seg.png b/provenance_manual/derivatives/seg/docs/prov-seg.png new file mode 100644 index 000000000..5f112fafb Binary files /dev/null and b/provenance_manual/derivatives/seg/docs/prov-seg.png differ diff --git a/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp1_act.json b/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp1_act.json new file mode 100644 index 000000000..907b7e45a --- /dev/null +++ b/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp1_act.json @@ -0,0 +1,12 @@ +{ + "Activities": [ + { + "Id": "bids::prov#segmentation-nO5RGsrb", + "Label": "Manual brain segmentation", + "Command": null, + "Used": [ + "bids:raw:sub-001/anat/sub-001_T1w.nii.gz" + ] + } + ] +} diff --git a/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp2_act.json b/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp2_act.json new file mode 100644 index 000000000..109b177ed --- /dev/null +++ b/provenance_manual/derivatives/seg/prov/prov-seg_desc-exp2_act.json @@ -0,0 +1,12 @@ +{ + "Activities": [ + { + "Id": "bids::prov#segmentation-mOOypIYB", + "Label": "Manual brain segmentation", + "Command": null, + "Used": [ + "bids:raw:sub-001/anat/sub-001_T1w.nii.gz" + ] + } + ] +} diff --git a/provenance_manual/derivatives/seg/prov/prov-seg_ent.json b/provenance_manual/derivatives/seg/prov/prov-seg_ent.json new file mode 100644 index 000000000..d24590fb2 --- /dev/null +++ b/provenance_manual/derivatives/seg/prov/prov-seg_ent.json @@ -0,0 +1,9 @@ +{ + "ProvEntities": [ + { + "Id": "bids:raw:sub-001/anat/sub-001_T1w.nii.gz", + "Label": "sub-001/anat/sub-001_T1w.nii.gz", + "AtLocation": "../../sourcedata/raw/sub-001/anat/sub-001_T1w.nii.gz" + } + ] +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/prov/provenance.tsv b/provenance_manual/derivatives/seg/prov/provenance.tsv new file mode 100644 index 000000000..a4fdd2bbf --- /dev/null +++ b/provenance_manual/derivatives/seg/prov/provenance.tsv @@ -0,0 +1,2 @@ +provenance_label description +prov-seg Manual brain segmentation performed by two experts diff --git a/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp1_dseg.json b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp1_dseg.json new file mode 100644 index 000000000..ed7ebc6a4 --- /dev/null +++ b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp1_dseg.json @@ -0,0 +1,3 @@ +{ + "GeneratedBy": "bids::prov#segmentation-nO5RGsrb" +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp1_dseg.nii.gz b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp1_dseg.nii.gz new file mode 100644 index 000000000..e69de29bb diff --git a/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp2_dseg.json b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp2_dseg.json new file mode 100644 index 000000000..28fed1304 --- /dev/null +++ b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp2_dseg.json @@ -0,0 +1,3 @@ +{ + "GeneratedBy": "bids::prov#segmentation-mOOypIYB" +} \ No newline at end of file diff --git a/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp2_dseg.nii.gz b/provenance_manual/derivatives/seg/sub-001/anat/sub-001_space-orig_desc-exp2_dseg.nii.gz new file mode 100644 index 000000000..e69de29bb diff --git a/provenance_manual/sourcedata/raw/dataset_description.json b/provenance_manual/sourcedata/raw/dataset_description.json new file mode 100644 index 000000000..c6ba04644 --- /dev/null +++ b/provenance_manual/sourcedata/raw/dataset_description.json @@ -0,0 +1,9 @@ +{ + "Name": "T1w image of one participant", + "BIDSVersion": "1.10.0", + "DatasetType": "raw", + "License": "CC0", + "Authors": [ + "Boris Clénet" + ] +} \ No newline at end of file diff --git a/provenance_manual/sourcedata/raw/sub-001/anat/sub-001_T1w.json b/provenance_manual/sourcedata/raw/sub-001/anat/sub-001_T1w.json new file mode 100644 index 000000000..3c6ebf8fb --- /dev/null +++ b/provenance_manual/sourcedata/raw/sub-001/anat/sub-001_T1w.json @@ -0,0 +1,8 @@ +{ + "RepetitionTime": 2.53, + "EchoTime": 0.00299, + "FlipAngle": 7, + "Manufacturer": "SIEMENS", + "ManufacturersModelName": "Prisma", + "MagneticFieldStrength": 3 +} \ No newline at end of file diff --git a/provenance_manual/sourcedata/raw/sub-001/anat/sub-001_T1w.nii.gz b/provenance_manual/sourcedata/raw/sub-001/anat/sub-001_T1w.nii.gz new file mode 100644 index 000000000..e69de29bb