Merge pull request #1 from flywheel-apps/v0.1.0

Jen Reiter · web-flow · commit b3a8091191a7 · 2017-10-31T08:40:02.000-05:00
V0.1.0
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,22 @@
+#flywheel/csv-import
+
+# Start with python 2.7
+FROM python:2.7
+MAINTAINER Flywheel <support@flywheel.io>
+
+# Install Python SDK
+RUN pip install https://github.com/flywheel-io/sdk/releases/download/0.2.0/flywheel-0.2.0-py2-none-linux_x86_64.whl
+
+# Make directory for flywheel spec (v0)
+ENV FLYWHEEL /flywheel/v0
+RUN mkdir -p ${FLYWHEEL}
+COPY run ${FLYWHEEL}/run
+COPY script.py ${FLYWHEEL}/script.py
+COPY manifest.json ${FLYWHEEL}/manifest.json
+
+# ENV preservation for Flywheel Engine
+RUN env -u HOSTNAME -u PWD | \
+  awk -F = '{ print "export " $1 "=\"" $2 "\"" }' > ${FLYWHEEL}/docker-env.sh
+
+# Set the entrypoint
+ENTRYPOINT ["/flywheel/v0/run"]
diff --git a/manifest.json b/manifest.json
@@ -0,0 +1,36 @@
+{
+	"name": "csv-import",
+	"label": "CSV Import",
+	"description": "Import a CSV of subject metadata into Flywheel. Import a CSV of subject metadata into Flywheel. Each row is a subject, each column is a value. First row must contain header. Subject Code matching input column name, or default to Subject ID",
+	"version": "0.1.0",
+	"flywheel": "0",
+	"inputs": {
+		"file": {
+			"base": "file",
+			"description": "Subject Metadata CSV file."
+		},
+		"api_key": {
+      			"base": "api-key"
+    		}
+	},
+	"config": {
+		"Group_Tags_By": {
+			"default": "",
+			"description": "Enter value to group imported tags by, or leave blank to import to subject.info",
+			"type": "string"
+		},
+		"Match_Column": {
+			"default": "Subject ID",
+			"description": "Column header for column containing Subject Code.",
+			"type": "string"
+		}
+	},
+	"author": "Imad Nijim",
+	"maintainer": "Flywheel <support@flywheel.io>",
+	"license": "Other",
+	"source": "https://github.com/flywheel-apps/csv-import",
+	"url": "www.flywheel.io",
+	"custom": {
+    		"docker-image": "flywheel/csv-import:v0.1.0"
+  	}
+}
diff --git a/run b/run
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+export PATH=/opt/conda/bin:$PATH
+
+python /flywheel/v0/script.py
diff --git a/script.py b/script.py
@@ -0,0 +1,114 @@
+# Import a CSV of subject metadata into Flywheel.
+# Each row is a subject, each column is a value. Expecting one subject in each file (i.e. no checking for duplicates). 
+# First row must contain header.
+# Subject Code matching input column name, or default to "Subject ID"
+# To do:
+#	get user contenxt and remove Imad's API key
+#	Add field type (string, int, etc) to make search facet useful (probably expected in the second row).  
+#   Need to implement subject code not in first colum
+
+import os
+import sys
+import time
+import csv
+import json
+import re
+
+# Gear basics
+print 'Setup'
+flywheel_base = '/flywheel/v0'
+input_folder = os.path.join(flywheel_base, 'input/file')
+output_folder = os.path.join(flywheel_base, 'output')
+config_file = os.path.join(flywheel_base, 'config.json')
+
+# Grab the input file path
+input_filename = os.listdir(input_folder)[0]
+input_filepath = os.path.join(input_folder, input_filename)
+
+# Grab config from config.json
+print 'Reading config'
+with open(config_file) as fp:
+    config = json.load(fp)
+
+# Get config parameters
+if 'Match_Column' in config['config']:
+    MatchColumn = config['config']['Match_Column'].strip()
+else:
+    MatchColumn = 'Subject ID'
+
+if 'Group_Tags_By' in config['config']:
+    GroupTagsBy = config['config']['Group_Tags_By'].strip()
+    if GroupTagsBy != "":
+        if not re.match('^[a-zA-Z0-9][a-zA-Z0-9_-]+$', GroupTagsBy):
+            print 'Group By Tags is invalid.'
+            print 'Valid values are: blank, alphanumeric including dashes and underscores'
+            sys.exit(1)
+else:
+    GroupTagsBy = ""
+
+
+
+# Load Flywheel Python SDK
+print 'Loading Python SDK'
+from flywheel import Flywheel
+
+# Get temporary API key
+api_key = str(config['inputs']['api_key']['key'])
+fw = Flywheel(api_key)
+
+# Get fileid
+fileid = config['inputs']['file']['hierarchy']['id']
+# get project information from file
+acquisition = fw.get_acquisition(fileid)
+session = fw.get_session(acquisition['session'])
+project = fw.get_project(session['project'])
+projectid = project['_id']
+projectname = project['label']
+
+# read CSV into list
+print 'Reading in CSV'
+with open(input_filepath, 'rbU') as f:
+    reader = csv.reader(f)
+    rows = list(reader)
+
+# Check if matching column exists
+if MatchColumn not in rows[0]:
+    print("Match Column not found in CSV")
+    sys.exit(1)
+
+# make list into json
+headers = rows[0]	
+csv_subjects = []
+for row in rows[1:]:
+    newdict = {}
+    for i in range(0, len(headers)):
+        newdict[headers[i]] = row[i]
+    csv_subjects.append(newdict)
+
+# Build the string and update session
+print 'Updating sessions'
+sessions = fw.get_project_sessions(projectid)
+
+for row in csv_subjects:
+    subjectfound = 0
+    for session in sessions:
+        if session['subject']['code'] == row[MatchColumn]:
+            subjectfound += 1
+            val = row.copy()
+            del val[MatchColumn]
+            if GroupTagsBy == "":
+                myobject = {'subject': {'info': val}}
+            else:
+                myobject = {'subject': {'info': {GroupTagsBy: val}}}
+            fw.modify_session(session['_id'], myobject)
+    if not subjectfound:
+        #print "Subject Code %s in %s does not match a subject in the %s project." % (row[MatchColumn], input_filename, projectname)
+        print 'Subject Code ' + row[MatchColumn] + ' in ' + input_filename + ' does not match a subject in the ' + projectname + ' project' + '.'
+
+# Add record keeping note to project notes
+if not GroupTagsBy:
+    fw.add_project_note(projectid, 'Imported %d fields from %s' % ((len(headers) - 1), input_filename) )
+    #fw.add_project_note(projectid, 'Imported ' + str(len(headers) - 1) + ' fields from ' + input_filename)
+else:
+    fw.add_project_note(projectid, 'Imported %d fields from %s, grouped by %s' % ((len(headers) - 1), input_filename, GroupTagsBy))
+    #fw.add_project_note(projectid, 'Imported ' + str(len(headers) - 1) + ' fields from ' + input_filename + ', grouped by ' + GroupTagsBy)