Skip to content

Commit 2e92239

Browse files
committed
requester pay stuff
1 parent eb87d1f commit 2e92239

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

wdl/tasks/Utility/BAMutils.wdl

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,6 @@ task SubsetBamToLocusLocal {
10411041
interval_list_file: "a Picard-style interval list file to subset reads with"
10421042
interval_id: "an ID string for representing the intervals in the interval list file"
10431043
prefix: "prefix for output bam and bai file names"
1044-
bam: {localization_optional: true}
10451044
}
10461045

10471046
input {
@@ -1059,13 +1058,11 @@ task SubsetBamToLocusLocal {
10591058

10601059
String subset_prefix = prefix + "." + interval_id
10611060

1062-
String local_bam = "/cromwell_root/~{basename(bam)}"
1063-
10641061
command <<<
1065-
set -euxo pipefail
1062+
set -euxo pipefail
10661063
1067-
time gcloud storage cp ~{bam} ~{local_bam}
1068-
mv ~{bai} "~{local_bam}.bai" && touch "~{local_bam}.bai"
1064+
# guard against cases where the bai doesn't follow the naming convension of ".bam.bai"
1065+
if [[ -f "~{bam}.bai" ]]; then mv ~{bai} "~{bam}.bai"; fi
10691066
10701067
# see man page for what '-M' means
10711068
samtools view \
@@ -1074,7 +1071,7 @@ task SubsetBamToLocusLocal {
10741071
-@ 1 \
10751072
--write-index \
10761073
-o "~{subset_prefix}.bam##idx##~{subset_prefix}.bam.bai" \
1077-
~{local_bam} "~{local_bam}.bai" \
1074+
~{bam} "~{bam}.bai" \
10781075
~{sep=" " intervals}
10791076
>>>
10801077

@@ -1092,7 +1089,7 @@ task SubsetBamToLocusLocal {
10921089
disk_gb: disk_size,
10931090
preemptible_tries: 0,
10941091
max_retries: 0,
1095-
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
1092+
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.23"
10961093
}
10971094
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
10981095
runtime {

wdl/tasks/Utility/Utils.wdl

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,42 +1526,60 @@ task SubsetBam {
15261526
RuntimeAttr? runtime_attr_override
15271527
}
15281528

1529+
String subset_prefix = prefix + "." + locus
1530+
15291531
command <<<
1532+
# the way this works is the following:
1533+
# 0) relying on the re-auth.sh script to export the credentials
1534+
# 1) perform the remote sam-view subsetting in the background
1535+
# 2) listen to the PID of the background process, while re-auth every 1200 seconds
1536+
source /opt/re-auth.sh
15301537
set -euxo pipefail
15311538
1532-
export GCS_OAUTH_TOKEN=$(gcloud auth application-default print-access-token)
1533-
15341539
echo "false" > "samtools.failed.txt"
15351540
1541+
# see man page for what '-M' means
15361542
samtools view \
15371543
-bhX \
15381544
-M \
15391545
-@ 1 \
15401546
--verbosity=8 \
15411547
--write-index \
1542-
-o "~{prefix}.bam##idx##~{prefix}.bam.bai" \
1548+
-o "~{subset_prefix}.bam##idx##~{subset_prefix}.bam.bai" \
15431549
~{bam} ~{bai} \
1544-
~{locus} \
1545-
|| { echo "samtools seem to have failed"; echo "true" > "samtools.failed.txt"; exit 77; }
1550+
~{locus} && exit 0 || { echo "samtools seem to have failed"; echo "true" > "samtools.failed.txt"; exit 77; } &
1551+
pid=$!
1552+
1553+
set +e
1554+
count=0
1555+
while true; do
1556+
sleep 1200 && date && source /opt/re-auth.sh
1557+
count=$(( count+1 ))
1558+
if [[ ${count} -gt 6 ]]; then echo "true" > "samtools.failed.txt" && exit 0; fi # way too many attempts, get out
1559+
if ! pgrep -x -P $pid; then exit 0; fi
1560+
done
15461561
>>>
15471562

15481563
output {
1549-
File subset_bam = "~{prefix}.bam"
1550-
File subset_bai = "~{prefix}.bam.bai"
1564+
File subset_bam = "~{subset_prefix}.bam"
1565+
File subset_bai = "~{subset_prefix}.bam.bai"
15511566
Boolean is_samtools_failed = read_boolean("samtools.failed.txt")
15521567
}
15531568

15541569
#########################
1555-
Int disk_size = if (0==disk_offset) then 4*ceil(size([bam, bai], "GB")) else disk_offset + ceil(size([bam, bai], "GB"))
1570+
# Int disk_size = if (0==disk_offset) then 4*ceil(size([bam, bai], "GB")) else disk_offset + ceil(size([bam, bai], "GB"))
1571+
Int min_disk = 10
1572+
Int proposal_disk = ceil(0.2 * size([bam, bai], "GiB"))
1573+
Int disk_size = if (proposal_disk<min_disk) then min_disk else proposal_disk # here we make one assumption that we aren't getting more than 10% of the whole genome
15561574
15571575
RuntimeAttr default_attr = object {
1558-
cpu_cores: 1,
1576+
cpu_cores: 2,
15591577
mem_gb: 10,
15601578
disk_gb: disk_size,
15611579
boot_disk_gb: 10,
15621580
preemptible_tries: 2,
15631581
max_retries: 1,
1564-
docker: "us.gcr.io/broad-dsp-lrma/lr-basic:0.1.1"
1582+
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.23"
15651583
}
15661584
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
15671585
runtime {
@@ -1779,7 +1797,7 @@ task ResilientSubsetBam {
17791797
boot_disk_gb: 10,
17801798
preemptible_tries: 2,
17811799
max_retries: 1,
1782-
docker: "us.gcr.io/broad-dsp-lrma/lr-basic:0.1.1"
1800+
docker: "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.23"
17831801
}
17841802
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
17851803
runtime {

0 commit comments

Comments
 (0)