Skip to content

Commit a400bd5

Browse files
jonn-smithlbergelson
authored andcommitted
Added in a WILL_FAIL status for workflows that will fail. (#85)
* Now will add a WILL_FAIL status to the status output when a workflow is running but doomed. * Added new colors to colorized list output. * Fixes #78 * Removed some extra turtles. * Added in resources and testing wdl/json files.
1 parent 7d466ea commit a400bd5

File tree

6 files changed

+316
-5
lines changed

6 files changed

+316
-5
lines changed

cromshell

+46-5
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@ ISINTERACTIVESHELL=true
1818
shopt -s expand_aliases
1919

2020
################################################################################
21+
2122
COLOR_NORM='\033[0m'
2223
COLOR_UNDERLINED='\033[1;4m'
2324
COLOR_FAILED='\033[1;37;41m'
25+
COLOR_WILL_FAIL='\033[1;31;47m'
2426
COLOR_SUCCEEDED='\033[1;30;42m'
2527
COLOR_RUNNING='\033[0;30;46m'
28+
COLOR_ABORTED='\033[0;30;43m'
29+
2630
TASK_COLOR_RUNNING='\033[0;34m'
2731
TASK_COLOR_SUCCEEDED='\033[0;32m'
2832
TASK_COLOR_FAILING='\033[0;33m'
@@ -732,18 +736,45 @@ function status()
732736
r=$?
733737
[[ $r -eq 0 ]] && retVal=1
734738

739+
# Hold our status string here:
740+
local workflowStatus=$( cat $f | jq -r .status )
741+
735742
if [[ $retVal -eq 1 ]]; then
736743
turtleDead
744+
elif [[ "${workflowStatus}" == "Running" ]] ; then
745+
# OK, status claims this workflow is running fine, but we need to check to see
746+
# if there are any failed sub-processes.
747+
# To do this, we use the `execution-status-count` logic with some filtering:
748+
local tmpExecutionStatusCount=$( makeTemp )
749+
local tmpMetadata=$( makeTemp )
750+
751+
# Get execution status count and filter the metadata down:
752+
curl --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIMEOUT" --compressed -s "${2}/api/workflows/v1/${1}/metadata?$CROMWELL_SLIM_METADATA_PARAMETERS" > ${tmpMetadata}
753+
cat ${tmpMetadata} | jq '.calls | map_values(group_by(.executionStatus) | map({(.[0].executionStatus): . | length}) | add)' > ${tmpExecutionStatusCount}
754+
755+
# Check for failure states:
756+
cat ${tmpMetadata} | jq --exit-status '[ ..|.executionStatus? | values | . == "Failed" ] | any' > /dev/null
757+
r=$?
758+
759+
# Check for failures:
760+
if [[ $r -ne 0 ]] ; then
761+
# We could not find 'Fail' in our metadata, so our original status is correct.
762+
turtle
763+
else
764+
turtleDead
765+
workflowStatus="WILL_FAIL"
766+
f=${tmpExecutionStatusCount}
767+
fi
737768
else
738769
turtle
739770
fi
740771

772+
# Display status to user:
741773
cat $f | jq .
742774
checkPipeStatus "Could not read tmp file JSON data." "Could not parse JSON output from cromwell server."
743775

744776
# Update ${CROMWELL_SUBMISSIONS_FILE}:
745-
local st=$( cat $f | jq . | grep status | sed -e 's#.*: ##g' | tr -d '",' )
746-
sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${st}#g" ${CROMWELL_SUBMISSIONS_FILE}
777+
sed -i .bak -e "s#\\(.*${1}.*\\.wdl\\)\\t*.*#\\1$(printf '\t')${workflowStatus}#g" ${CROMWELL_SUBMISSIONS_FILE}
747778

748779
return $retVal
749780
}
@@ -1025,11 +1056,21 @@ function list()
10251056
r=$?
10261057
[ $r -eq 0 ] && echo -e "${COLOR_UNDERLINED}${line}${COLOR_NORM}" && continue
10271058

1059+
# Check for jobs that WILL FAIL and color those lines:
1060+
echo "${line}" | grep -q 'WILL_FAIL'
1061+
r=$?
1062+
[ $r -eq 0 ] && echo -e "${COLOR_WILL_FAIL}${line}${COLOR_NORM}" && continue
1063+
10281064
# Check for failed jobs and color those lines:
10291065
echo "${line}" | grep -q 'Failed'
10301066
r=$?
10311067
[ $r -eq 0 ] && echo -e "${COLOR_FAILED}${line}${COLOR_NORM}" && continue
10321068

1069+
# Check for Aborted jobs and color those lines:
1070+
echo "${line}" | grep -q 'Aborted'
1071+
r=$?
1072+
[ $r -eq 0 ] && echo -e "${COLOR_ABORTED}${line}${COLOR_NORM}" && continue
1073+
10331074
# Check for successful jobs and color those lines:
10341075
echo "${line}" | grep -q 'Succeeded'
10351076
r=$?
@@ -1168,7 +1209,7 @@ function list-outputs()
11681209
local id=$1
11691210
local cromwellServer=$2
11701211

1171-
local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
1212+
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
11721213

11731214
local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
11741215

@@ -1199,7 +1240,7 @@ function fetch-logs()
11991240
local id=$1
12001241
local cromwellServer=$2
12011242

1202-
local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
1243+
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null| grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
12031244

12041245
local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
12051246

@@ -1233,7 +1274,7 @@ function fetch-all()
12331274
local id=$1
12341275
local cromwellServer=$2
12351276

1236-
local remoteFolder=$( metadata ${id} ${cromwellServer} | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
1277+
local remoteFolder=$( metadata ${id} ${cromwellServer} 2>/dev/null | grep "\"callRoot\":" | head -n1 | awk '{print $2}' | sed "s#\"\\(.*${id}\\).*#\\1#g" )
12371278

12381279
local localServerFolder="${CROMSHELL_CONFIG_DIR}/$( echo "${cromwellServer}" | sed -e 's#ht.*://##g' )/${id}"
12391280

resources/options.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"monitoring_script": "gs://broad-dsp-methods-resources/cromwell_monitoring_script.sh"
3+
}

testing/helloWorld.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"HelloWorld.docker": "frolvlad/alpine-bash"
3+
}

testing/helloWorld.wdl

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Hello World!
2+
#
3+
# Description of inputs:
4+
#
5+
# Required:
6+
# String docker - Docker image in which to run
7+
#
8+
# Optional:
9+
# Int mem - Amount of memory to give to the machine running each task in this workflow.
10+
# Int preemptible_attempts - Number of times to allow each task in this workflow to be preempted.
11+
# Int disk_space_gb - Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
12+
# Int cpu - Number of CPU cores to give to each machine running each task in this workflow.
13+
# Int boot_disk_size_gb - Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
14+
#
15+
workflow HelloWorld {
16+
String docker
17+
18+
Int? mem
19+
Int? preemptible_attempts
20+
Int? disk_space_gb
21+
Int? cpu
22+
Int? boot_disk_size_gb
23+
24+
call HelloWorldTask {
25+
input:
26+
docker = docker,
27+
mem = mem,
28+
preemptible_attempts = preemptible_attempts,
29+
disk_space_gb = disk_space_gb,
30+
cpu = cpu,
31+
boot_disk_size_gb = boot_disk_size_gb
32+
}
33+
34+
output {
35+
}
36+
}
37+
38+
task HelloWorldTask {
39+
40+
# ------------------------------------------------
41+
# Input args:
42+
43+
# Required:
44+
45+
# Runtime Options:
46+
String docker
47+
Int? mem
48+
Int? preemptible_attempts
49+
Int? disk_space_gb
50+
Int? cpu
51+
Int? boot_disk_size_gb
52+
53+
# ------------------------------------------------
54+
# Process input args:
55+
56+
# ------------------------------------------------
57+
# Get machine settings:
58+
Boolean use_ssd = false
59+
60+
# You may have to change the following two parameter values depending on the task requirements
61+
Int default_ram_mb = 3 * 1024
62+
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
63+
Int default_disk_space_gb = 100
64+
65+
Int default_boot_disk_size_gb = 15
66+
67+
# Mem is in units of GB but our command and memory runtime values are in MB
68+
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
69+
Int command_mem = machine_mem - 1024
70+
71+
# ------------------------------------------------
72+
# Run our command:
73+
command <<<
74+
set -e
75+
echo 'Hello World!'
76+
>>>
77+
78+
# ------------------------------------------------
79+
# Runtime settings:
80+
runtime {
81+
docker: docker
82+
memory: machine_mem + " MB"
83+
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
84+
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
85+
preemptible: 0
86+
cpu: select_first([cpu, 1])
87+
}
88+
89+
# ------------------------------------------------
90+
# Outputs:
91+
output {
92+
}
93+
}
94+

testing/will_fail.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"WillFailTester.docker": "frolvlad/alpine-bash"
3+
}

testing/will_fail.wdl

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Will fail tester
2+
# results in a workflow that "will fail" after 5 miunutes.
3+
# used to test the "WILL_FAIL" status.
4+
#
5+
# Description of inputs:
6+
#
7+
# Required:
8+
# String docker - Docker image in which to run
9+
#
10+
# Optional:
11+
# Int mem - Amount of memory to give to the machine running each task in this workflow.
12+
# Int preemptible_attempts - Number of times to allow each task in this workflow to be preempted.
13+
# Int disk_space_gb - Amount of storage disk space (in Gb) to give to each machine running each task in this workflow.
14+
# Int cpu - Number of CPU cores to give to each machine running each task in this workflow.
15+
# Int boot_disk_size_gb - Amount of boot disk space (in Gb) to give to each machine running each task in this workflow.
16+
#
17+
workflow WillFailTester {
18+
String docker
19+
20+
Int? mem
21+
Int? preemptible_attempts
22+
Int? disk_space_gb
23+
Int? cpu
24+
Int? boot_disk_size_gb
25+
26+
call FailFastTask {
27+
input:
28+
docker = docker,
29+
mem = mem,
30+
preemptible_attempts = preemptible_attempts,
31+
disk_space_gb = disk_space_gb,
32+
cpu = cpu,
33+
boot_disk_size_gb = boot_disk_size_gb
34+
}
35+
36+
call PassRunsLong {
37+
input:
38+
docker = docker,
39+
mem = mem,
40+
preemptible_attempts = preemptible_attempts,
41+
disk_space_gb = disk_space_gb,
42+
cpu = cpu,
43+
boot_disk_size_gb = boot_disk_size_gb
44+
}
45+
46+
output {
47+
}
48+
}
49+
50+
task FailFastTask {
51+
52+
# ------------------------------------------------
53+
# Input args:
54+
55+
# Required:
56+
57+
# Runtime Options:
58+
String docker
59+
Int? mem
60+
Int? preemptible_attempts
61+
Int? disk_space_gb
62+
Int? cpu
63+
Int? boot_disk_size_gb
64+
65+
# ------------------------------------------------
66+
# Process input args:
67+
68+
# ------------------------------------------------
69+
# Get machine settings:
70+
Boolean use_ssd = false
71+
72+
# You may have to change the following two parameter values depending on the task requirements
73+
Int default_ram_mb = 3 * 1024
74+
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
75+
Int default_disk_space_gb = 100
76+
77+
Int default_boot_disk_size_gb = 15
78+
79+
# Mem is in units of GB but our command and memory runtime values are in MB
80+
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
81+
Int command_mem = machine_mem - 1024
82+
83+
# ------------------------------------------------
84+
# Run our command:
85+
command <<<
86+
set -e
87+
88+
# Nonsense here so we will fail fast:
89+
aojewfajefaiefiapwghaghiogewi;gsaklagdhkashghhkl
90+
>>>
91+
92+
# ------------------------------------------------
93+
# Runtime settings:
94+
runtime {
95+
docker: docker
96+
memory: machine_mem + " MB"
97+
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
98+
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
99+
preemptible: 0
100+
cpu: select_first([cpu, 1])
101+
}
102+
103+
# ------------------------------------------------
104+
# Outputs:
105+
output {
106+
}
107+
}
108+
109+
task PassRunsLong {
110+
111+
# ------------------------------------------------
112+
# Input args:
113+
114+
# Required:
115+
116+
# Runtime Options:
117+
String docker
118+
Int? mem
119+
Int? preemptible_attempts
120+
Int? disk_space_gb
121+
Int? cpu
122+
Int? boot_disk_size_gb
123+
124+
# ------------------------------------------------
125+
# Process input args:
126+
127+
# ------------------------------------------------
128+
# Get machine settings:
129+
Boolean use_ssd = false
130+
131+
# You may have to change the following two parameter values depending on the task requirements
132+
Int default_ram_mb = 3 * 1024
133+
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
134+
Int default_disk_space_gb = 100
135+
136+
Int default_boot_disk_size_gb = 15
137+
138+
# Mem is in units of GB but our command and memory runtime values are in MB
139+
Int machine_mem = if defined(mem) then mem * 1024 else default_ram_mb
140+
Int command_mem = machine_mem - 1024
141+
142+
# ------------------------------------------------
143+
# Run our command:
144+
command <<<
145+
set -e
146+
147+
# Wait for 5 minutes:
148+
sleep 300
149+
>>>
150+
151+
# ------------------------------------------------
152+
# Runtime settings:
153+
runtime {
154+
docker: docker
155+
memory: machine_mem + " MB"
156+
disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
157+
bootDiskSizeGb: select_first([boot_disk_size_gb, default_boot_disk_size_gb])
158+
preemptible: 0
159+
cpu: select_first([cpu, 1])
160+
}
161+
162+
# ------------------------------------------------
163+
# Outputs:
164+
output {
165+
}
166+
}
167+

0 commit comments

Comments
 (0)