Skip to content

Commit cd1f4f0

Browse files
committed
Merge pull request #438 from yarikoptic/doc-usecases
2 parents cdce122 + 5a2f0f3 commit cd1f4f0

File tree

3 files changed

+310
-1
lines changed

3 files changed

+310
-1
lines changed
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
#!/bin/bash
2+
#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
3+
#ex: set sts=4 ts=4 sw=4 et:
4+
#
5+
# This script is intended to demonstrate a sample workflow on a BIDS
6+
# dataset using mriqc, fmriprep, and custom analysis pipeline, mimicing the
7+
# steps presented in an fmriprep paper currently under review but using
8+
# DataLad, ReproNim/containers, and ReproNim.
9+
#
10+
# COPYRIGHT: Yaroslav Halchenko 2019
11+
#
12+
# LICENSE: MIT
13+
#
14+
# Permission is hereby granted, free of charge, to any person obtaining a copy
15+
# of this software and associated documentation files (the "Software"), to deal
16+
# in the Software without restriction, including without limitation the rights
17+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18+
# copies of the Software, and to permit persons to whom the Software is
19+
# furnished to do so, subject to the following conditions:
20+
#
21+
# The above copyright notice and this permission notice shall be included in
22+
# all copies or substantial portions of the Software.
23+
#
24+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30+
# THE SOFTWARE.
31+
#
32+
# Description
33+
#
34+
# Environment variables
35+
# - RUNNER - datalad or reproman
36+
# - CONTAINERS_REPO - an alternative (could be local) location for containers
37+
# repository
38+
# - INPUT_DATASET_REPO - an alternative (could be local) location for input
39+
# BIDS dataset
40+
#
41+
# Sample invocations
42+
# - Pointing to the existing local clones of input repositories for faster
43+
# "get"
44+
# RUNNER=datalad \
45+
# FS_LICENSE=~/.freesurfer-license \
46+
# CONTAINERS_REPO=~/proj/repronim/containers \
47+
# INPUT_DATASET_REPO=$PWD/bids-fmriprep-workflow-NP/ds000003-demo \
48+
# ./bids-fmriprep-workflow-NP.sh bids-fmriprep-workflow-NP/out2
49+
50+
set -eu
51+
# set -x
52+
53+
# $STUDY is a variable used in a paper this workflow mimics
54+
STUDY="$1"
55+
56+
# Define common parameters for the reproman run
57+
58+
# ReproMan orchestrator to be used - determines how data/results would be
59+
# transferred and execution protocoled
60+
# Use reproman run --list orchestrators to get an updated list
61+
RM_ORC=datalad-pair-run # ,plain,datalad-pair,datalad-local-run
62+
63+
# Which batch processing system supported by ReproMan will be used
64+
# Use reproman run --list submitters to get an updated list
65+
# RM_SUB=condor,pbs,local
66+
67+
# Which resource to use
68+
# It would require (if was not done before) to configure
69+
# a resource where execution will happen. For now will just use smaug below.
70+
# TODO: provide pointers to doc ( ;-) )
71+
# RM_RESOURCE=
72+
73+
#RM_RESOURCE=discovery
74+
#RM_SUB=PBS
75+
#
76+
# Necessary modules to be loaded in that session:
77+
# - singularity/2.4.2
78+
# Necessary installations/upgrades to be done (TODO: contact John)
79+
# - datalad (0.11.6, TODO: release first)
80+
# - datalad-container
81+
82+
: ${RM_RESOURCE:=local}
83+
: ${RM_SUB:=local}
84+
85+
# TODO: at reproman level allow to specify ORC and SUB for a resource, so there would
86+
# be no need to specify for each invocation. Could be a new (meta) resource such as
87+
# "smaug-condor" which would link smaug physical resource with those parameters
88+
# TODO: point to the issue in ReproMan
89+
90+
: "${RUNNER:=reproman}"
91+
92+
unknown_runner () {
93+
echo "ERROR: Unknown runner $RUNNER. Known reproman and datalad" >&2
94+
exit 1
95+
}
96+
97+
# Common invocation of ReproMan
98+
# TODO: just make it configurable per project/env?
99+
reproman_run () {
100+
reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" "$@"
101+
}
102+
103+
104+
# TODO: see where such functionality could be provided within reproman, so could
105+
# be easily reused
106+
get_participant_ids () {
107+
# Would go through provided paths and current directory to find participants.tsv
108+
# and return participant ids, comma-separated
109+
for p in "$@" .; do
110+
f="$p/participants.tsv"
111+
if [ -e "$f" ]; then
112+
sed -n -e '/^sub-/s/sub-\([^\t]*\)\t.*/\1/gp' < "$f" \
113+
| tr '\n' ',' \
114+
| sed -e 's/,$//g'
115+
break
116+
fi
117+
done
118+
}
119+
120+
function run_bids_app() {
121+
app="$1"; shift
122+
do_group="$1"; shift
123+
app_args=( "$@" -w work )
124+
125+
outds=data/$app
126+
container=containers/bids-$app
127+
app_runner_args=( --input 'data/bids' --output "$outds" )
128+
129+
mkdir -p work
130+
grep -e '^work$' .gitignore \
131+
|| { echo "work" >> .gitignore; datalad save -m "Ignore work directory"; }
132+
133+
set -x
134+
# Create target output dataset
135+
# TODO: per app specific configuration? some might have too heavy xml etc
136+
# files
137+
[ -e "$outds" ] || datalad create -d . -c text2git "$outds"
138+
139+
case "$RUNNER" in
140+
reproman)
141+
# Serial run
142+
# reproman_run --jp container=containers/bids-mriqc "${RUNNER_ARGS[@]}" "${MRIQC_ARGS[@]}"
143+
# Parallel requires two runs -- parallel across participants:
144+
reproman_run --jp "container=$container" "${app_runner_args[@]}" \
145+
--bp "pl=$(get_participant_ids data/bids)" \
146+
'{inputs}' '{outputs}' participant --participant_label '{p[pl]}' "${app_args[@]}"
147+
case "$do_group" in
148+
1|yes)
149+
# serial for the group
150+
reproman_run --jp "container=$container" "${app_runner_args[@]}" \
151+
'{inputs}' '{outputs}' group "${app_args[@]}"
152+
;;
153+
0|no)
154+
;;
155+
*)
156+
echo "Unknown value APP_GROUP=$do_group" >&2
157+
exit 1
158+
;;
159+
esac
160+
;;
161+
datalad)
162+
case "$do_group" in
163+
1|yes) app_args=( group "${app_args[@]}" ) ;;
164+
0|no) ;;
165+
*) exit 1 ;;
166+
esac
167+
datalad containers-run -n "$container" "${app_runner_args[@]}" \
168+
'{inputs}' '{outputs}' participant "${app_args[@]}"
169+
;;
170+
*) unknown_runner;;
171+
esac
172+
set +x
173+
}
174+
175+
#
176+
# Check asap for licenses since fmriprep needs one for FreeSurfer
177+
#
178+
179+
if [ -z "${FS_LICENSE:-}" ]; then
180+
if [ -e "${FREESURFER_HOME:-/XXXX}/.license" ]; then
181+
FS_LICENSE="${FREESURFER_HOME}/.license"
182+
else
183+
cat >&2 <<EOF
184+
Error: No FreeSurfer license found!
185+
Either define FREESURFER_HOME environment variable pointing to a directory
186+
with .license file for FreeSurfer or define FS_LICENSE environment variable
187+
which would either point to the license file or contain the license
188+
(with "\\n" for new lines) to be used for FreeSurfer
189+
EOF
190+
exit 1
191+
fi
192+
fi
193+
194+
195+
# Create study dataset
196+
datalad create -c text2git "$STUDY"
197+
cd "$STUDY"
198+
199+
#
200+
# Install containers dataset for guaranteed/unambigous containers versioning
201+
# and datalad containers-run
202+
#
203+
# TODO: specific version, TODO - reference datalad issue
204+
205+
# Local copy to avoid heavy network traffic while testing locally could be
206+
# referenced in CONTAINERS_REPO env var
207+
datalad install -d . -s "${CONTAINERS_REPO:-///repronim/containers}"
208+
209+
# TODO: shift that into some helper script in the containers
210+
CONTAINERS_FS_LICENSE=containers/licenses/freesurfer
211+
if [ -e "$FS_LICENSE" ]; then
212+
cp "$FS_LICENSE" "$CONTAINERS_FS_LICENSE"
213+
else
214+
echo -n "$FS_LICENSE" >| "$CONTAINERS_FS_LICENSE"
215+
fi
216+
datalad save -d . -m "Added licenses/freesurfer (needed for fmriprep)" containers/licenses/
217+
( cd containers; git annex metadata licenses/freesurfer -s distribution-restrictions=sensitive; )
218+
219+
220+
# possibly downgrade versions to match the ones used in the "paper"
221+
containers/scripts/freeze_versions --save-dataset=^ \
222+
poldracklab-ds003-example=0.0.3 \
223+
bids-mriqc=0.15.0 \
224+
bids-fmriprep=1.4.1
225+
226+
#
227+
# Install dataset to be analyzed (no data - analysis might run in the cloud or on HPC)
228+
#
229+
# In original paper name for the dataset was used as is, and placed at the
230+
# top level. Here, to make this demo easier to apply to other studies,
231+
# and also check on other datasets, we install input dataset under a generic
232+
# "data/bids" path. "data/" will also collect all other derivatives etc
233+
mkdir data
234+
235+
# For now we will work with minimized version with only 2 subjects
236+
# datalad install -d . -s ///openneuro/ds000003 data/bids
237+
datalad install -d . -s "${INPUT_DATASET_REPO:-https://github.com/ReproNim/ds000003-demo}" data/bids
238+
239+
#
240+
# Execution.
241+
#
242+
# That is where access to the powerful resource (HPC) etc would be useful.
243+
# Every of those containerized apps might need custom options to be added.
244+
#
245+
#
246+
247+
# datalad save -d . -m "Due to https://github.com/datalad/datalad/issues/3591" data/mriqc
248+
249+
250+
run_bids_app mriqc yes
251+
# note: not using $CONTAINERS_FS_LICENSE just to make things a bit more explicit
252+
run_bids_app fmriprep no --fs-license-file=containers/licenses/freesurfer
253+
254+
# 3. poldracklab-ds003-example -- analysis
255+
256+
# X. Later? visualization etc - used nilearn
257+
258+
259+
exit 0 # done for now
260+
261+
262+
reproman run --follow -r "${RM_RESOURCE}" --sub "${RM_SUB}" --orc "${RM_ORC}" \
263+
--bp 'thing=thing-*' \
264+
--input '{p[thing]}' \
265+
sh -c 'cat {p[thing]} {p[thing]} >doubled-{p[thing]}'
266+
267+

docs/usecases/simple_kwyk.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
#emacs: -*- mode: shell-script; c-basic-offset: 4; tab-width: 4; indent-tabs-mode: t -*-
3+
#ex: set sts=4 ts=4 sw=4 noet:
4+
#
5+
#
6+
# COPYRIGHT: Yaroslav Halchenko 2019
7+
#
8+
# LICENSE: MIT
9+
#
10+
# Permission is hereby granted, free of charge, to any person obtaining a copy
11+
# of this software and associated documentation files (the "Software"), to deal
12+
# in the Software without restriction, including without limitation the rights
13+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
# copies of the Software, and to permit persons to whom the Software is
15+
# furnished to do so, subject to the following conditions:
16+
#
17+
# The above copyright notice and this permission notice shall be included in
18+
# all copies or substantial portions of the Software.
19+
#
20+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
# THE SOFTWARE.
27+
#
28+
29+
set -eu
30+
31+
cd $(mktemp -d --tmpdir=. ds-XXXX)
32+
pwd
33+
datalad create .
34+
datalad install -d . ///repronim/containers
35+
datalad install -d . -s https://github.com/ReproNim/ds000003-demo data/bids
36+
37+
mkdir data/kwyked
38+
datalad containers-run \
39+
--input data/bids/sub-02/anat/sub-02_T1w.nii.gz \
40+
--output data/kwyked/sub-02_T1w \
41+
-n containers/neuronets-kwyk \
42+
'{inputs}' '{outputs}'

reproman/support/jobs/job_templates/runscript/base.template.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ then
5353
# least for now, below is a brittle solution were the last job waits until it
5454
# sees that all other jobs have exited and then runs the post-command stuff.
5555
nstatus () {
56-
find "$metadir" -regex '.*/status\.[0-9]+' | wc -l
56+
find "$metadir" -regex '.*/status\.[0-9][0-9]*' | wc -l
5757
}
5858

5959
# Ugly, but this sleep makes it less likely for the post-command tar to fail

0 commit comments

Comments
 (0)