Skip to content

Commit 7584509

Browse files
committed
add requester pays input
1 parent 51c902c commit 7584509

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

pipelines/wdl/glimpse/low_pass_imputation/Glimpse2LowPassImputation.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ workflow Glimpse2LowPassImputation {
77
input {
88
Array[String] contigs
99

10-
# this is the path the a directory that contains sites vcf, sites table, and reference chunks file. should end with a "/"
10+
# this is the directory that contains sites vcf, sites table, and reference chunks file. should end with a "/"
1111
String reference_panel_prefix
1212

1313
File? input_vcf

pipelines/wdl/glimpse/low_pass_imputation/input_qc/LowPassImputationQC.wdl

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ workflow InputQC {
55
String pipeline_version = "0.0.1"
66

77
input {
8-
98
Array[String] contigs
109

11-
# this is the path the a directory that contains sites vcf, sites tabke, and reference chunks file. should end with a "/"
10+
# this is the directory that contains sites vcf, sites table, and reference chunks file. should end with a "/"
1211
String reference_panel_prefix
1312

14-
# service currently does not accept VCFs as input
1513
Array[File]? crams
1614
Array[File]? cram_indices
1715
Array[String]? sample_ids
1816
File? cram_manifest
19-
File fasta
20-
File fasta_index
2117
String output_basename
2218

19+
File fasta
20+
File fasta_index
2321
File ref_dict
22+
23+
String billing_project_for_rp?
2424
}
2525

2626
# validate that either crams, or cram manifest is provided
@@ -47,8 +47,8 @@ workflow InputQC {
4747
4848
# validations for array crams input
4949
if (do_cram_qc) {
50-
Array[File] cram_array = select_first([crams, ConvertCramManifestToCramArrays.crams])
51-
Array[File] cram_index_array = select_first([cram_indices, ConvertCramManifestToCramArrays.cram_indices, []])
50+
Array[String] cram_array = select_first([crams, ConvertCramManifestToCramArrays.crams])
51+
Array[String] cram_index_array = select_first([cram_indices, ConvertCramManifestToCramArrays.cram_indices, []])
5252
Array[String] sample_id_array = select_first([sample_ids, ConvertCramManifestToCramArrays.sample_ids, []])
5353

5454
if (!defined(cram_index_array) || !defined(sample_id_array)) {
@@ -60,7 +60,8 @@ workflow InputQC {
6060
input:
6161
crams = cram_array,
6262
cram_indices = cram_index_array,
63-
sample_ids = sample_id_array
63+
sample_ids = sample_id_array,
64+
billing_project_for_rp = billing_project_for_rp
6465
}
6566
}
6667
@@ -141,6 +142,7 @@ task ValidateCramsAndIndices {
141142
Array[String] sample_ids
142143

143144
Int max_cram_file_size_gb = 10
145+
String billing_project_for_rp? # if set, will use this to check file sizes for requester pays buckets. if not set, will not be able to check file sizes for requester pays buckets and will assume all files are below the max file size
144146
145147
String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0"
146148
Int cpu = 1
@@ -152,6 +154,8 @@ task ValidateCramsAndIndices {
152154
Int num_cram_indices = length(cram_indices)
153155
Int num_sample_ids = length(sample_ids)
154156

157+
String gcloud_requester_pays_flag = if defined(billing_project_for_rp) then "--billing-project ${billing_project_for_rp}" else ""
158+
155159
command <<<
156160
# create empty qc messages file
157161
touch qc_messages.txt
@@ -205,7 +209,7 @@ task ValidateCramsAndIndices {
205209
# ensure that all CRAM files are less than the maximum file size allowed by the service (currently 10GB)
206210
# this also serves as an access check, which should already have been performed by the service
207211
crams_exceeding_max_size=$(cat crams_list.txt | while read cram; do
208-
file_size_bytes=$(gcloud storage ls -L "$cram" | grep "Content-Length:" | awk '{print $2}')
212+
file_size_bytes=$(gcloud storage ls -L "$cram" ~{gcloud_requester_pays_flag} | grep "Content-Length:" | awk '{print $2}')
209213
file_size_gb=$((file_size_bytes / 1024 / 1024 / 1024))
210214
if [ $file_size_gb -gt ~{max_cram_file_size_gb} ]; then
211215
echo "$cram (${file_size_gb}GB)"

pipelines/wdl/glimpse/low_pass_imputation/input_qc/test_inputs/Plumbing/fail_cram_file_size.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
"LowPassImputationQC.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
88
"LowPassImputationQC.crams": ["gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/morgan_test/fakeCram0.cram", "gs://pd-test-storage-public/CramToUnmappedBams/input/scientific/G94982.NA12878.WGS/NA12878.cram"],
99
"LowPassImputationQC.cram_indices": ["gs://fc-cddd72b5-323c-495c-9557-5057fff0275a/morgan_test/fakeCram0.cram.crai", "gs://pd-test-storage-public/CramToUnmappedBams/input/scientific/G94982.NA12878.WGS/NA12878.cram.crai"],
10-
"LowPassImputationQC.sample_ids": ["sample0", "NA12878"]
10+
"LowPassImputationQC.sample_ids": ["sample0", "NA12878"],
11+
"LowPassImputationQC.billing_project_for_rp": "terra-f8e3de20"
1112
}

0 commit comments

Comments
 (0)