Merge pull request #39 from neuroscout/datalad

adelavega · web-flow · commit 10eb826fa599 · 2018-08-02T13:30:15.000-05:00
Datalad compatability
diff --git a/Dockerfile b/Dockerfile
@@ -1,28 +1,34 @@
 # Use an poldracklab/fitlins as a parent image
 FROM poldracklab/fitlins
 
-# Copy the current directory contents into the container at /app (using COPY instead of ADD to keep it lighter)
-COPY [".", "/src/neuroscout"]
-
 # Set user back to root
 USER root
 RUN chown -R root /src /work
 
+# Install neurodebian/datalad
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install datalad -yq
+
+RUN git config --global user.name "Neuroscout"
+RUN git config --global  user.email "user@example.edu"
+
+
 # Install additional neuroscout + dependencies
 RUN /bin/bash -c "source activate neuro \
       && pip install -q --no-cache-dir -e /src/fitlins[all]" \
     && sync
 
+# Copy the current directory contents into the container at /app (using COPY instead of ADD to keep it lighter)
+COPY [".", "/src/neuroscout"]
+
 RUN /bin/bash -c "source activate neuro \
       && pip install -q --no-cache-dir -e /src/neuroscout/" \
     && sync
 
 RUN /bin/bash -c "source activate neuro \
-      && pip install -q --no-cache-dir -r /src/neuroscout/requirements.txt" \
+      && pip install -q --no-cache-dir --upgrade -r /src/neuroscout/requirements.txt" \
     && sync
 
-RUN /bin/bash -c "source activate neuro \
-     && pip install -q  --no-cache-dir --upgrade git+https://github.com/INCF/pybids.git#egg=pybids"
 
 WORKDIR /work
 
diff --git a/neuroscout_cli/cli.py b/neuroscout_cli/cli.py
@@ -2,7 +2,7 @@
 neuroscout
 
 Usage:
-    neuroscout run <bundle_id> [-dn -w <work_dir> -o <out_dir> -i <install_dir> --n-cpus=<n>]
+    neuroscout run <bundle_id> [-dn -o <out_dir> -i <install_dir> --n-cpus=<n> --work-dir=<dir>]
     neuroscout install <bundle_id> [-dn -i <install_dir>]
     neuroscout ls <bundle_id>
     neuroscout -h | --help
@@ -11,7 +11,7 @@
 Options:
     -i <install_dir>        Directory to download dataset and bundle [default: .]
     -o <out_dir>            Output directory [default: bundle_dir]
-    -w <work_dir>           Working directory
+    --work-dir=<dir>        Working directory
     --n-cpus=<n>            Maximum number of threads across all processes [default: 1]
     -n, --no-download       Dont download dataset (if available locally)
     -h --help               Show this screen
diff --git a/neuroscout_cli/commands/install.py b/neuroscout_cli/commands/install.py
@@ -1,6 +1,6 @@
 from neuroscout_cli.commands.base import Command
 from neuroscout_cli import API_URL
-from datalad.api import install
+from datalad.api import install, get
 from pathlib import Path
 import requests
 import json
@@ -59,31 +59,35 @@ def download_bundle(self):
     def download_data(self):
         self.download_bundle()
 
-        # logging.info("Installing dataset...")
-        # Use datalad to install the raw BIDS dataset
-        # install(source=self.resources['dataset_address'],
-        #         path=(self.dataset_dir).as_posix()).path
+        remote_files = self.resources['func_paths'] + self.resources['mask_paths']
+        remote_path = self.resources['preproc_address']
 
-        # Pre-fetch specific files from the original dataset?
-        logging.info("Fetching remote resources...")
+        deriv_dir = Path(self.dataset_dir) / 'derivatives'
 
-        # Fetch remote preprocessed files
-        remote_path = self.resources['preproc_address']
-        remote_files = self.resources['func_paths'] + self.resources['mask_paths']
+        try:
+            if not (deriv_dir / 'fmriprep').exists():
+                # Use datalad to install the raw BIDS dataset
+                install(source=remote_path,
+                        path=(deriv_dir / 'fmriprep').as_posix())
+
+            preproc_dir = deriv_dir / 'fmriprep' / 'fmriprep'
+            get([(preproc_dir / f).as_posix() for f in remote_files])
+        except Exception as e:
+            message = e.failed[0]['message']
+            if 'Failed to clone data from any candidate source URL' not in message[0]:
+                raise ValueError("Datalad failed. Reason: {}".format(message))
 
-        preproc_dir = Path(self.dataset_dir) / 'derivatives' / 'fmriprep'
-        preproc_dir.mkdir(exist_ok=True, parents=True)
+            logging.info("Attempting HTTP download...")
+            preproc_dir = deriv_dir / 'fmriprep'
+            for i, resource in enumerate(remote_files):
+                filename = preproc_dir / resource
+                logging.info("{}/{}: {}".format(i+1, len(remote_files), resource))
 
-        for i, resource in enumerate(remote_files):
-            logging.info("{}/{}: {}".format(i+1, len(remote_files), resource))
-            filename = preproc_dir / resource
-            if not filename.exists():
-                filename.parents[0].mkdir(exist_ok=True, parents=True)
-                url = remote_path + '/' + resource
-                download_file(url, filename)
+                if not filename.exists():
+                    filename.parents[0].mkdir(exist_ok=True, parents=True)
+                    download_file(remote_path + '/' + resource, filename)
 
-        desc = {'Name': self.dataset_dir.parts[0],
-         'BIDSVersion': '1.0'}
+        desc = {'Name': self.dataset_dir.parts[0], 'BIDSVersion': '1.0'}
 
         with (self.dataset_dir / 'dataset_description.json').open('w') as f:
             json.dump(desc, f)
diff --git a/neuroscout_cli/commands/run.py b/neuroscout_cli/commands/run.py
@@ -29,11 +29,11 @@ def run(self):
             tmp_out,
             'dataset',
             '--model={}'.format((bundle_path / 'model.json').absolute().as_posix()),
-            '--exclude=.*neuroscout/(?!{}).*'.format(bundle_path.parts[-1])
+            '--exclude=(neuroscout/(?!{})|fmriprep.*$(?<=tsv)|/.git)'.format(bundle_path.parts[-1])
         ]
 
         # Fitlins invalid keys
-        invalid = ['--no-download', '--version', '--help', '-i']
+        invalid = ['--no-download', '--version', '--help', '-i', 'run', '<bundle_id>']
         for k in invalid:
             self.options.pop(k, None)
 
@@ -42,8 +42,10 @@ def run(self):
             if name.startswith('--'):
                 if value is True:
                     fitlins_args.append('{}'.format(name))
-            elif name.startswith('-'):
-                if value is not None:
+                if value is not None and value is not False:
+                    fitlins_args.append('{}={}'.format(name, value))
+            else:
+                if value is not False and value is not None:
                     fitlins_args.append('{} {}'.format(name, value))
 
         bids.config.set_options(loop_preproc=True)