@@ -1526,42 +1526,60 @@ task SubsetBam {
15261526 RuntimeAttr ? runtime_attr_override
15271527 }
15281528
1529+ String subset_prefix = prefix + "." + locus
1530+
15291531 command <<<
1532+ # the way this works is the following:
1533+ # 0) relying on the re-auth.sh script to export the credentials
1534+ # 1) perform the remote sam-view subsetting in the background
1535+ # 2) listen to the PID of the background process, while re-auth every 1200 seconds
1536+ source /opt/re-auth.sh
15301537 set -euxo pipefail
15311538
1532- export GCS_OAUTH_TOKEN = $(gcloud auth application-default print-access-token )
1533-
15341539 echo "false" > "samtools.failed.txt"
15351540
1541+ # see man page for what '-M' means
15361542 samtools view \
15371543 -bhX \
15381544 -M \
15391545 -@ 1 \
15401546 --verbosity =8 \
15411547 --write-index \
1542- -o "~{prefix }.bam##idx##~{prefix }.bam.bai" \
1548+ -o "~{subset_prefix }.bam##idx##~{subset_prefix }.bam.bai" \
15431549 ~{bam } ~{bai } \
1544- ~{locus } \
1545- || { echo "samtools seem to have failed" ; echo "true" > "samtools.failed.txt" ; exit 77 ; }
1550+ ~{locus } && exit 0 || { echo "samtools seem to have failed" ; echo "true" > "samtools.failed.txt" ; exit 77 ; } &
1551+ pid = $!
1552+
1553+ set +e
1554+ count = 0
1555+ while true ; do
1556+ sleep 1200 && date && source /opt/re-auth.sh
1557+ count = $(( count + 1 ))
1558+ if [[ ${count } -gt 6 ]]; then echo "true" > "samtools.failed.txt" && exit 0 ; fi # way too many attempts, get out
1559+ if ! pgrep -x -P $pid ; then exit 0 ; fi
1560+ done
15461561 >>>
15471562
15481563 output {
1549- File subset_bam = "~{prefix }.bam"
1550- File subset_bai = "~{prefix }.bam.bai"
1564+ File subset_bam = "~{subset_prefix }.bam"
1565+ File subset_bai = "~{subset_prefix }.bam.bai"
15511566 Boolean is_samtools_failed = read_boolean ("samtools.failed.txt" )
15521567 }
15531568
15541569 #########################
1555- Int disk_size = if (0 ==disk_offset ) then 4 *ceil (size ([bam , bai ], "GB" )) else disk_offset + ceil (size ([bam , bai ], "GB" ))
1570+ # Int disk_size = if (0==disk_offset) then 4*ceil(size([bam, bai], "GB")) else disk_offset + ceil(size([bam, bai], "GB"))
1571+ Int min_disk = 10
1572+ Int proposal_disk = ceil (0.2 * size ([bam , bai ], "GiB" ))
1573+ Int disk_size = if (proposal_disk <min_disk ) then min_disk else proposal_disk # here we make one assumption that we aren't getting more than 10% of the whole genome
15561574
15571575 RuntimeAttr default_attr = object {
1558- cpu_cores : 1 ,
1576+ cpu_cores : 2 ,
15591577 mem_gb : 10 ,
15601578 disk_gb : disk_size ,
15611579 boot_disk_gb : 10 ,
15621580 preemptible_tries : 2 ,
15631581 max_retries : 1 ,
1564- docker : "us.gcr.io/broad-dsp-lrma/lr-basic :0.1.1 "
1582+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools :0.1.23 "
15651583 }
15661584 RuntimeAttr runtime_attr = select_first ([runtime_attr_override , default_attr ])
15671585 runtime {
@@ -1779,7 +1797,7 @@ task ResilientSubsetBam {
17791797 boot_disk_gb : 10 ,
17801798 preemptible_tries : 2 ,
17811799 max_retries : 1 ,
1782- docker : "us.gcr.io/broad-dsp-lrma/lr-basic :0.1.1 "
1800+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools :0.1.23 "
17831801 }
17841802 RuntimeAttr runtime_attr = select_first ([runtime_attr_override , default_attr ])
17851803 runtime {
0 commit comments