|
| 1 | +#!/bin/bash |
| 2 | +#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- |
| 3 | +#ex: set sts=4 ts=4 sw=4 et: |
| 4 | +# |
| 5 | +# This script is intended to demonstrate a sample workflow on a BIDS |
| 6 | +# dataset using mriqc, fmriprep, and custom analysis pipeline, mimicing the |
| 7 | +# steps presented in an fmriprep paper currently under review but using |
| 8 | +# DataLad, ReproNim/containers, and ReproNim. |
| 9 | +# |
| 10 | +# COPYRIGHT: Yaroslav Halchenko 2019 |
| 11 | +# |
| 12 | +# LICENSE: MIT |
| 13 | +# |
| 14 | +# Permission is hereby granted, free of charge, to any person obtaining a copy |
| 15 | +# of this software and associated documentation files (the "Software"), to deal |
| 16 | +# in the Software without restriction, including without limitation the rights |
| 17 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 18 | +# copies of the Software, and to permit persons to whom the Software is |
| 19 | +# furnished to do so, subject to the following conditions: |
| 20 | +# |
| 21 | +# The above copyright notice and this permission notice shall be included in |
| 22 | +# all copies or substantial portions of the Software. |
| 23 | +# |
| 24 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 25 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 26 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 27 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 28 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 29 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 30 | +# THE SOFTWARE. |
| 31 | +# |
| 32 | +# Description |
| 33 | +# |
| 34 | +# Environment variables |
| 35 | +# - RUNNER - datalad or reproman |
| 36 | +# - CONTAINERS_REPO - an alternative (could be local) location for containers |
| 37 | +# repository |
| 38 | +# - INPUT_DATASET_REPO - an alternative (could be local) location for input |
| 39 | +# BIDS dataset |
| 40 | +# |
| 41 | +# Sample invocations |
| 42 | +# - Pointing to the existing local clones of input repositories for faster |
| 43 | +# "get" |
| 44 | +# RUNNER=datalad \ |
| 45 | +# FS_LICENSE=~/.freesurfer-license \ |
| 46 | +# CONTAINERS_REPO=~/proj/repronim/containers \ |
| 47 | +# INPUT_DATASET_REPO=$PWD/bids-fmriprep-workflow-NP/ds000003-demo \ |
| 48 | +# ./bids-fmriprep-workflow-NP.sh bids-fmriprep-workflow-NP/out2 |
| 49 | + |
| 50 | +set -eu |
| 51 | +# set -x |
| 52 | + |
| 53 | +# $STUDY is a variable used in a paper this workflow mimics |
| 54 | +STUDY="$1" |
| 55 | + |
| 56 | +# Define common parameters for the reproman run |
| 57 | + |
| 58 | +# ReproMan orchestrator to be used - determines how data/results would be |
| 59 | +# transferred and execution protocoled |
| 60 | +# Use reproman run --list orchestrators to get an updated list |
| 61 | +RM_ORC=datalad-pair-run # ,plain,datalad-pair,datalad-local-run |
| 62 | + |
| 63 | +# Which batch processing system supported by ReproMan will be used |
| 64 | +# Use reproman run --list submitters to get an updated list |
| 65 | +# RM_SUB=condor,pbs,local |
| 66 | + |
| 67 | +# Which resource to use |
| 68 | +# It would require (if was not done before) to configure |
| 69 | +# a resource where execution will happen. For now will just use smaug below. |
| 70 | +# TODO: provide pointers to doc ( ;-) ) |
| 71 | +# RM_RESOURCE= |
| 72 | + |
| 73 | +#RM_RESOURCE=discovery |
| 74 | +#RM_SUB=PBS |
| 75 | +# |
| 76 | +# Necessary modules to be loaded in that session: |
| 77 | +# - singularity/2.4.2 |
| 78 | +# Necessary installations/upgrades to be done (TODO: contact John) |
| 79 | +# - datalad (0.11.6, TODO: release first) |
| 80 | +# - datalad-container |
| 81 | + |
| 82 | +: ${RM_RESOURCE:=local} |
| 83 | +: ${RM_SUB:=local} |
| 84 | + |
| 85 | +# TODO: at reproman level allow to specify ORC and SUB for a resource, so there would |
| 86 | +# be no need to specify for each invocation. Could be a new (meta) resource such as |
| 87 | +# "smaug-condor" which would link smaug physical resource with those parameters |
| 88 | +# TODO: point to the issue in ReproMan |
| 89 | + |
| 90 | +: "${RUNNER:=reproman}" |
| 91 | + |
| 92 | +unknown_runner () { |
| 93 | + echo "ERROR: Unknown runner $RUNNER. Known reproman and datalad" >&2 |
| 94 | + exit 1 |
| 95 | +} |
| 96 | + |
| 97 | +# Common invocation of ReproMan |
| 98 | +# TODO: just make it configurable per project/env? |
| 99 | +reproman_run () { |
| 100 | + reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" "$@" |
| 101 | +} |
| 102 | + |
| 103 | + |
| 104 | +# TODO: see where such functionality could be provided within reproman, so could |
| 105 | +# be easily reused |
| 106 | +get_participant_ids () { |
| 107 | + # Would go through provided paths and current directory to find participants.tsv |
| 108 | + # and return participant ids, comma-separated |
| 109 | + for p in "$@" .; do |
| 110 | + f="$p/participants.tsv" |
| 111 | + if [ -e "$f" ]; then |
| 112 | + sed -n -e '/^sub-/s/sub-\([^\t]*\)\t.*/\1/gp' < "$f" \ |
| 113 | + | tr '\n' ',' \ |
| 114 | + | sed -e 's/,$//g' |
| 115 | + break |
| 116 | + fi |
| 117 | + done |
| 118 | +} |
| 119 | + |
| 120 | +function run_bids_app() { |
| 121 | + app="$1"; shift |
| 122 | + do_group="$1"; shift |
| 123 | + app_args=( "$@" -w work ) |
| 124 | + |
| 125 | + outds=data/$app |
| 126 | + container=containers/bids-$app |
| 127 | + app_runner_args=( --input 'data/bids' --output "$outds" ) |
| 128 | + |
| 129 | + mkdir -p work |
| 130 | + grep -e '^work$' .gitignore \ |
| 131 | + || { echo "work" >> .gitignore; datalad save -m "Ignore work directory"; } |
| 132 | + |
| 133 | + set -x |
| 134 | + # Create target output dataset |
| 135 | + # TODO: per app specific configuration? some might have too heavy xml etc |
| 136 | + # files |
| 137 | + [ -e "$outds" ] || datalad create -d . -c text2git "$outds" |
| 138 | + |
| 139 | + case "$RUNNER" in |
| 140 | + reproman) |
| 141 | + # Serial run |
| 142 | + # reproman_run --jp container=containers/bids-mriqc "${RUNNER_ARGS[@]}" "${MRIQC_ARGS[@]}" |
| 143 | + # Parallel requires two runs -- parallel across participants: |
| 144 | + reproman_run --jp "container=$container" "${app_runner_args[@]}" \ |
| 145 | + --bp "pl=$(get_participant_ids data/bids)" \ |
| 146 | + '{inputs}' '{outputs}' participant --participant_label '{p[pl]}' "${app_args[@]}" |
| 147 | + case "$do_group" in |
| 148 | + 1|yes) |
| 149 | + # serial for the group |
| 150 | + reproman_run --jp "container=$container" "${app_runner_args[@]}" \ |
| 151 | + '{inputs}' '{outputs}' group "${app_args[@]}" |
| 152 | + ;; |
| 153 | + 0|no) |
| 154 | + ;; |
| 155 | + *) |
| 156 | + echo "Unknown value APP_GROUP=$do_group" >&2 |
| 157 | + exit 1 |
| 158 | + ;; |
| 159 | + esac |
| 160 | + ;; |
| 161 | + datalad) |
| 162 | + case "$do_group" in |
| 163 | + 1|yes) app_args=( group "${app_args[@]}" ) ;; |
| 164 | + 0|no) ;; |
| 165 | + *) exit 1 ;; |
| 166 | + esac |
| 167 | + datalad containers-run -n "$container" "${app_runner_args[@]}" \ |
| 168 | + '{inputs}' '{outputs}' participant "${app_args[@]}" |
| 169 | + ;; |
| 170 | + *) unknown_runner;; |
| 171 | + esac |
| 172 | + set +x |
| 173 | +} |
| 174 | + |
| 175 | +# |
| 176 | +# Check asap for licenses since fmriprep needs one for FreeSurfer |
| 177 | +# |
| 178 | + |
| 179 | +if [ -z "${FS_LICENSE:-}" ]; then |
| 180 | + if [ -e "${FREESURFER_HOME:-/XXXX}/.license" ]; then |
| 181 | + FS_LICENSE="${FREESURFER_HOME}/.license" |
| 182 | + else |
| 183 | + cat >&2 <<EOF |
| 184 | +Error: No FreeSurfer license found! |
| 185 | + Either define FREESURFER_HOME environment variable pointing to a directory |
| 186 | + with .license file for FreeSurfer or define FS_LICENSE environment variable |
| 187 | + which would either point to the license file or contain the license |
| 188 | + (with "\\n" for new lines) to be used for FreeSurfer |
| 189 | +EOF |
| 190 | + exit 1 |
| 191 | + fi |
| 192 | +fi |
| 193 | + |
| 194 | + |
| 195 | +# Create study dataset |
| 196 | +datalad create -c text2git "$STUDY" |
| 197 | +cd "$STUDY" |
| 198 | + |
| 199 | +# |
| 200 | +# Install containers dataset for guaranteed/unambigous containers versioning |
| 201 | +# and datalad containers-run |
| 202 | +# |
| 203 | +# TODO: specific version, TODO - reference datalad issue |
| 204 | + |
| 205 | +# Local copy to avoid heavy network traffic while testing locally could be |
| 206 | +# referenced in CONTAINERS_REPO env var |
| 207 | +datalad install -d . -s "${CONTAINERS_REPO:-///repronim/containers}" |
| 208 | + |
| 209 | +# TODO: shift that into some helper script in the containers |
| 210 | +CONTAINERS_FS_LICENSE=containers/licenses/freesurfer |
| 211 | +if [ -e "$FS_LICENSE" ]; then |
| 212 | + cp "$FS_LICENSE" "$CONTAINERS_FS_LICENSE" |
| 213 | +else |
| 214 | + echo -n "$FS_LICENSE" >| "$CONTAINERS_FS_LICENSE" |
| 215 | +fi |
| 216 | +datalad save -d . -m "Added licenses/freesurfer (needed for fmriprep)" containers/licenses/ |
| 217 | +( cd containers; git annex metadata licenses/freesurfer -s distribution-restrictions=sensitive; ) |
| 218 | + |
| 219 | + |
| 220 | +# possibly downgrade versions to match the ones used in the "paper" |
| 221 | +containers/scripts/freeze_versions --save-dataset=^ \ |
| 222 | + poldracklab-ds003-example=0.0.3 \ |
| 223 | + bids-mriqc=0.15.0 \ |
| 224 | + bids-fmriprep=1.4.1 |
| 225 | + |
| 226 | +# |
| 227 | +# Install dataset to be analyzed (no data - analysis might run in the cloud or on HPC) |
| 228 | +# |
| 229 | +# In original paper name for the dataset was used as is, and placed at the |
| 230 | +# top level. Here, to make this demo easier to apply to other studies, |
| 231 | +# and also check on other datasets, we install input dataset under a generic |
| 232 | +# "data/bids" path. "data/" will also collect all other derivatives etc |
| 233 | +mkdir data |
| 234 | + |
| 235 | +# For now we will work with minimized version with only 2 subjects |
| 236 | +# datalad install -d . -s ///openneuro/ds000003 data/bids |
| 237 | +datalad install -d . -s "${INPUT_DATASET_REPO:-https://github.com/ReproNim/ds000003-demo}" data/bids |
| 238 | + |
| 239 | +# |
| 240 | +# Execution. |
| 241 | +# |
| 242 | +# That is where access to the powerful resource (HPC) etc would be useful. |
| 243 | +# Every of those containerized apps might need custom options to be added. |
| 244 | +# |
| 245 | +# |
| 246 | + |
| 247 | +# datalad save -d . -m "Due to https://github.com/datalad/datalad/issues/3591" data/mriqc |
| 248 | + |
| 249 | + |
| 250 | +run_bids_app mriqc yes |
| 251 | +# note: not using $CONTAINERS_FS_LICENSE just to make things a bit more explicit |
| 252 | +run_bids_app fmriprep no --fs-license-file=containers/licenses/freesurfer |
| 253 | + |
| 254 | +# 3. poldracklab-ds003-example -- analysis |
| 255 | + |
| 256 | +# X. Later? visualization etc - used nilearn |
| 257 | + |
| 258 | + |
| 259 | +exit 0 # done for now |
| 260 | + |
| 261 | + |
| 262 | +reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" \ |
| 263 | + --bp 'thing=thing-*' \ |
| 264 | + --input '{p[thing]}' \ |
| 265 | + sh -c 'cat {p[thing]} {p[thing]} >doubled-{p[thing]}' |
| 266 | + |
| 267 | + |
0 commit comments