Added in a WILL_FAIL status for workflows that will fail. (#85)

jonn-smith · lbergelson · commit a400bd51a139 · 2019-07-31T14:42:26.000-04:00
* Now will add a WILL_FAIL status to the status output when a workflow is running but doomed. * Added new colors to colorized list output. * Fixes #78 * Removed some extra turtles. * Added in resources and testing wdl/json files.
diff --git a/cromshell b/cromshell
@@ -18,11 +18,15 @@ ISINTERACTIVESHELL=true
 shopt -s expand_aliases
 
 ################################################################################
+
 COLOR_NORM='\033[0m'
 COLOR_UNDERLINED='\033[1;4m'
 COLOR_FAILED='\033[1;37;41m'
+COLOR_WILL_FAIL='\033[1;31;47m'
 COLOR_SUCCEEDED='\033[1;30;42m'
 COLOR_RUNNING='\033[0;30;46m'
+COLOR_ABORTED='\033[0;30;43m'
+
 TASK_COLOR_RUNNING='\033[0;34m'
 TASK_COLOR_SUCCEEDED='\033[0;32m'
 TASK_COLOR_FAILING='\033[0;33m'
@@ -732,18 +736,45 @@ function status()
   r=$?
   [[ $r -eq 0 ]] && retVal=1
 
+  # Hold our status string here:
+  local workflowStatus=$( cat $f | jq -r .status ) 
+
   if [[ $retVal -eq 1 ]]; then
     turtleDead
+  elif [[ "${workflowStatus}" == "Running" ]] ; then
+    # OK, status claims this workflow is running fine, but we need to check to see
+    # if there are any failed sub-processes.
+    # To do this, we use the `execution-status-count` logic with some filtering:
+    local tmpExecutionStatusCount=$( makeTemp )
+    local tmpMetadata=$( makeTemp )
+
+    # Get execution status count and filter the metadata down:
+    curl --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIMEOUT" --compressed -s "${2}/api/workflows/v1/${1}/metadata?$CROMWELL_SLIM_METADATA_PARAMETERS" > ${tmpMetadata}
+    cat ${tmpMetadata} | jq '.calls | map_values(group_by(.executionStatus) | map({(.[0].executionStatus): . | length}) | add)' > ${tmpExecutionStatusCount}
+
+    # Check for failure states:
+    cat ${tmpMetadata} | jq --exit-status '[ ..|.executionStatus? | values | . == "Failed" ] | any' >  /dev/null
+    r=$?
+
+    # Check for failures:
+    if [[ $r -ne 0 ]] ; then
+      # We could not find 'Fail' in our metadata, so our original status is correct. 
+      turtle
+    else
+      turtleDead
+      workflowStatus="WILL_FAIL"
+      f=${tmpExecutionStatusCount}
+    fi
   else
     turtle
   fi
 
+  # Display status to user:
   cat $f | jq . 
   checkPipeStatus "Could not read tmp file JSON data." "Could not parse JSON output from cromwell server."
 
   # Update ${CROMWELL_SUBMISSIONS_FILE}:
-  local st=$( cat $f | jq . | grep status | sed -e 's#.*: ##g' | tr -d '",' )
-  sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${st}#g" ${CROMWELL_SUBMISSIONS_FILE}
+  sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${workflowStatus}#g" ${CROMWELL_SUBMISSIONS_FILE}
 
   return $retVal
 }
@@ -1025,11 +1056,21 @@ function list()
       r=$?
       [ $r -eq 0 ] && echo -e "${COLOR_UNDERLINED}${line}${COLOR_NORM}" && continue
 
+      # Check for jobs that WILL FAIL and color those lines:
+      echo "${line}" | grep -q 'WILL_FAIL' 
+      r=$?
+      [ $r -eq 0 ] && echo -e "${COLOR_WILL_FAIL}${line}${COLOR_NORM}" && continue
+      
       # Check for failed jobs and color those lines:
       echo "${line}" | grep -q 'Failed' 
       r=$?
       [ $r -eq 0 ] && echo -e "${COLOR_FAILED}${line}${COLOR_NORM}" && continue
 
+      # Check for Aborted jobs and color those lines:
+      echo "${line}" | grep -q 'Aborted' 
+      r=$?
+      [ $r -eq 0 ] && echo -e "${COLOR_ABORTED}${line}${COLOR_NORM}" && continue
+
       # Check for successful jobs and color those lines:
       echo "${line}" | grep -q 'Succeeded' 
       r=$?
@@ -1168,7 +1209,7 @@ function list-outputs()
   local id=$1
   local cromwellServer=$2
 
-  local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
+  local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
 
   local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
 
@@ -1199,7 +1240,7 @@ function fetch-logs()
   local id=$1
   local cromwellServer=$2
 
-  local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
+  local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null| grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
 
   local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
 
@@ -1233,7 +1274,7 @@ function fetch-all()
   local id=$1
   local cromwellServer=$2
 
-  local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
+  local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
 
   local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
 
diff --git a/resources/options.json b/resources/options.json
@@ -0,0 +1,3 @@
+{
+	"monitoring_script": "gs://broad-dsp-methods-resources/cromwell_monitoring_script.sh"
+}
diff --git a/testing/helloWorld.json b/testing/helloWorld.json
@@ -0,0 +1,3 @@
+{
+  "HelloWorld.docker": "frolvlad/alpine-bash"
+}
diff --git a/testing/helloWorld.wdl b/testing/helloWorld.wdl
@@ -0,0 +1,94 @@
+# Hello World! 
+#
+# Description of inputs:
+#
+#   Required:
+#     String docker                -  Docker image in which to run
+#
+#   Optional:
+#     Int  mem                     -  Amount of memory to give to the machine running each task in this workflow.
+#     Int  preemptible_attempts    -  Number of times to allow each task in this workflow to be preempted.
+#     Int  disk_space_gb           -  Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
+#     Int  cpu                     -  Number of CPU cores to give to each machine running each task in this workflow.
+#     Int  boot_disk_size_gb       -  Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
+#
+workflow HelloWorld {
+    String docker
+
+    Int? mem
+    Int? preemptible_attempts
+    Int? disk_space_gb
+    Int? cpu
+    Int? boot_disk_size_gb
+
+        call HelloWorldTask {
+            input:
+                docker               = docker,
+                mem                  = mem,
+                preemptible_attempts = preemptible_attempts,
+                disk_space_gb        = disk_space_gb,
+                cpu                  = cpu,
+                boot_disk_size_gb    = boot_disk_size_gb
+        }
+
+    output {
+    }
+}
+
+task HelloWorldTask {
+
+    # ------------------------------------------------
+    # Input args:
+
+    # Required:
+
+     # Runtime Options:
+     String docker
+     Int? mem
+     Int? preemptible_attempts
+     Int? disk_space_gb
+     Int? cpu
+     Int? boot_disk_size_gb
+
+    # ------------------------------------------------
+    # Process input args:
+
+    # ------------------------------------------------
+    # Get machine settings:
+     Boolean use_ssd = false
+
+    # You may have to change the following two parameter values depending on the task requirements
+    Int default_ram_mb = 3 * 1024
+    # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
+    Int default_disk_space_gb = 100
+
+    Int default_boot_disk_size_gb = 15
+
+    # Mem is in units of GB but our command and memory runtime values are in MB
+    Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
+    Int command_mem = machine_mem - 1024
+
+    # ------------------------------------------------
+    # Run our command:
+     command <<<
+         set -e
+				 echo 'Hello World!'				 
+     >>>
+
+    # ------------------------------------------------
+    # Runtime settings:
+     runtime {
+         docker: docker
+         memory: machine_mem + " MB"
+         disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
+         bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
+         preemptible: 0 
+         cpu: select_first([cpu, 1])
+     }
+
+    # ------------------------------------------------
+    # Outputs:
+     output {
+     }
+ }
+
diff --git a/testing/will_fail.json b/testing/will_fail.json
@@ -0,0 +1,3 @@
+{
+  "WillFailTester.docker": "frolvlad/alpine-bash"
+}
diff --git a/testing/will_fail.wdl b/testing/will_fail.wdl
@@ -0,0 +1,167 @@
+# Will fail tester
+#   results in a workflow that "will fail" after 5 miunutes.
+#   used to test the "WILL_FAIL" status.
+#
+# Description of inputs:
+#
+#   Required:
+#     String docker                -  Docker image in which to run
+#
+#   Optional:
+#     Int  mem                     -  Amount of memory to give to the machine running each task in this workflow.
+#     Int  preemptible_attempts    -  Number of times to allow each task in this workflow to be preempted.
+#     Int  disk_space_gb           -  Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
+#     Int  cpu                     -  Number of CPU cores to give to each machine running each task in this workflow.
+#     Int  boot_disk_size_gb       -  Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
+#
+workflow WillFailTester {
+    String docker
+
+    Int? mem
+    Int? preemptible_attempts
+    Int? disk_space_gb
+    Int? cpu
+    Int? boot_disk_size_gb
+
+        call FailFastTask {
+            input:
+                docker                    = docker,
+                mem                       = mem,
+                preemptible_attempts      = preemptible_attempts,
+                disk_space_gb             = disk_space_gb,
+                cpu                       = cpu,
+                boot_disk_size_gb         = boot_disk_size_gb
+        }
+
+        call PassRunsLong {
+            input:
+                docker                    = docker,
+                mem                       = mem,
+                preemptible_attempts      = preemptible_attempts,
+                disk_space_gb             = disk_space_gb,
+                cpu                       = cpu,
+                boot_disk_size_gb         = boot_disk_size_gb
+        }
+
+    output {
+    }
+}
+
+task FailFastTask {
+
+    # ------------------------------------------------
+    # Input args:
+
+    # Required:
+
+     # Runtime Options:
+     String docker
+     Int? mem
+     Int? preemptible_attempts
+     Int? disk_space_gb
+     Int? cpu
+     Int? boot_disk_size_gb
+
+    # ------------------------------------------------
+    # Process input args:
+
+    # ------------------------------------------------
+    # Get machine settings:
+     Boolean use_ssd = false
+
+    # You may have to change the following two parameter values depending on the task requirements
+    Int default_ram_mb = 3 * 1024
+    # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
+    Int default_disk_space_gb = 100
+
+    Int default_boot_disk_size_gb = 15
+
+    # Mem is in units of GB but our command and memory runtime values are in MB
+    Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
+    Int command_mem = machine_mem - 1024
+
+    # ------------------------------------------------
+    # Run our command:
+     command <<<
+         set -e
+
+				# Nonsense here so we will fail fast:
+				aojewfajefaiefiapwghaghiogewi;gsaklagdhkashghhkl
+     >>>
+
+    # ------------------------------------------------
+    # Runtime settings:
+     runtime {
+         docker: docker
+         memory: machine_mem + " MB"
+         disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
+         bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
+         preemptible: 0 
+         cpu: select_first([cpu, 1])
+     }
+
+    # ------------------------------------------------
+    # Outputs:
+     output {
+     }
+ }
+
+task PassRunsLong {
+
+    # ------------------------------------------------
+    # Input args:
+
+    # Required:
+
+     # Runtime Options:
+     String docker
+     Int? mem
+     Int? preemptible_attempts
+     Int? disk_space_gb
+     Int? cpu
+     Int? boot_disk_size_gb
+
+    # ------------------------------------------------
+    # Process input args:
+
+    # ------------------------------------------------
+    # Get machine settings:
+     Boolean use_ssd = false
+
+    # You may have to change the following two parameter values depending on the task requirements
+    Int default_ram_mb = 3 * 1024
+    # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
+    Int default_disk_space_gb = 100
+
+    Int default_boot_disk_size_gb = 15
+
+    # Mem is in units of GB but our command and memory runtime values are in MB
+    Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
+    Int command_mem = machine_mem - 1024
+
+    # ------------------------------------------------
+    # Run our command:
+     command <<<
+         set -e
+
+				# Wait for 5 minutes:
+				sleep 300
+     >>>
+
+    # ------------------------------------------------
+    # Runtime settings:
+     runtime {
+         docker: docker
+         memory: machine_mem + " MB"
+         disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
+         bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
+         preemptible: 0 
+         cpu: select_first([cpu, 1])
+     }
+
+    # ------------------------------------------------
+    # Outputs:
+     output {
+     }
+}
+

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "monitoring_script": "gs://broad-dsp-methods-resources/cromwell_monitoring_script.sh"`
	`3`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "HelloWorld.docker": "frolvlad/alpine-bash"`
	`3`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "WillFailTester.docker": "frolvlad/alpine-bash"`
	`3`	`+}`