@@ -5,22 +5,22 @@ workflow InputQC {
55 String pipeline_version = "0.0.1"
66
77 input {
8-
98 Array [String ] contigs
109
11- # this is the path the a directory that contains sites vcf, sites tabke , and reference chunks file. should end with a "/"
10+ # this is the directory that contains sites vcf, sites table , and reference chunks file. should end with a "/"
1211 String reference_panel_prefix
1312
14- # service currently does not accept VCFs as input
1513 Array [File ]? crams
1614 Array [File ]? cram_indices
1715 Array [String ]? sample_ids
1816 File ? cram_manifest
19- File fasta
20- File fasta_index
2117 String output_basename
2218
19+ File fasta
20+ File fasta_index
2321 File ref_dict
22+
23+ String billing_project_for_rp ?
2424 }
2525
2626 # validate that either crams, or cram manifest is provided
@@ -47,8 +47,8 @@ workflow InputQC {
4747
4848 # validations for array crams input
4949 if (do_cram_qc ) {
50- Array [File ] cram_array = select_first ([crams , ConvertCramManifestToCramArrays .crams ])
51- Array [File ] cram_index_array = select_first ([cram_indices , ConvertCramManifestToCramArrays .cram_indices , []])
50+ Array [String ] cram_array = select_first ([crams , ConvertCramManifestToCramArrays .crams ])
51+ Array [String ] cram_index_array = select_first ([cram_indices , ConvertCramManifestToCramArrays .cram_indices , []])
5252 Array [String ] sample_id_array = select_first ([sample_ids , ConvertCramManifestToCramArrays .sample_ids , []])
5353
5454 if (!defined (cram_index_array ) || !defined (sample_id_array )) {
@@ -60,7 +60,8 @@ workflow InputQC {
6060 input :
6161 crams = cram_array ,
6262 cram_indices = cram_index_array ,
63- sample_ids = sample_id_array
63+ sample_ids = sample_id_array ,
64+ billing_project_for_rp = billing_project_for_rp
6465 }
6566 }
6667
@@ -141,6 +142,7 @@ task ValidateCramsAndIndices {
141142 Array [String ] sample_ids
142143
143144 Int max_cram_file_size_gb = 10
145+ String billing_project_for_rp ? # if set, will use this to check file sizes for requester pays buckets. if not set, will not be able to check file sizes for requester pays buckets and will assume all files are below the max file size
144146
145147 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0"
146148 Int cpu = 1
@@ -152,6 +154,8 @@ task ValidateCramsAndIndices {
152154 Int num_cram_indices = length (cram_indices )
153155 Int num_sample_ids = length (sample_ids )
154156
157+ String gcloud_requester_pays_flag = if defined (billing_project_for_rp ) then "--billing-project ${billing_project_for_rp }" else ""
158+
155159 command <<<
156160 # create empty qc messages file
157161 touch qc_messages.txt
@@ -205,7 +209,7 @@ task ValidateCramsAndIndices {
205209 # ensure that all CRAM files are less than the maximum file size allowed by the service (currently 10GB)
206210 # this also serves as an access check, which should already have been performed by the service
207211 crams_exceeding_max_size = $(cat crams_list.txt | while read cram ; do
208- file_size_bytes = $(gcloud storage ls -L "$cram " | grep "Content-Length:" | awk '{print $2}' )
212+ file_size_bytes = $(gcloud storage ls -L "$cram " ~{ gcloud_requester_pays_flag } | grep "Content-Length:" | awk '{print $2}' )
209213 file_size_gb = $((file_size_bytes / 1024 / 1024 / 1024 ))
210214 if [ $file_size_gb -gt ~{max_cram_file_size_gb } ]; then
211215 echo "$cram (${file_size_gb} GB)"
0 commit comments