22
33set -Eeuo pipefail
44
5+ # ./scramble.sh NA12878.final.cram NA12878.final.cram.crai NA12878.final.cram NA12878.final.cram.crai NA12878.counts.tsv.gz NA12878.manta.vcf.gz test Homo_sapiens_assembly38.fasta Homo_sapiens_assembly38.fasta.fai primary_contigs.list 90 hg38.repeatmasker.mei.with_SVA.pad_50_merged.bed.gz
6+
57bam_or_cram_file=${1}
68bam_or_cram_index=${2}
79original_bam_or_cram_file=${3}
@@ -29,6 +31,9 @@ part2_threads=${15:-7}
2931scramble_vcf_script=${16:- " /opt/sv-pipeline/scripts/make_scramble_vcf.py" }
3032make_scramble_vcf_args=${17:- " " }
3133
34+ # In case it is re-run, the script will wait for a response
35+ # on override the existing file, that may not work in a pipeline.
36+ rm -f test.scramble.tsv.gz
3237
3338# ScramblePart1
3439# -------------
@@ -41,7 +46,7 @@ zcat "${counts_file}" \
4146 | cut -f4 \
4247 | Rscript -e " cat(round(${min_clipped_reads_fraction} *median(data.matrix(read.csv(file(\" stdin\" ))))))" \
4348 > cutoff.txt
44- MIN_CLIPPED_READS=$( cat cutoff.txt)
49+ export MIN_CLIPPED_READS=$( cat cutoff.txt)
4550echo " MIN_CLIPPED_READS: ${MIN_CLIPPED_READS} "
4651
4752# Identify clusters of split reads
@@ -50,31 +55,26 @@ while read region; do
5055 | gzip >> " ${sample_name} " .scramble_clusters.tsv.gz
5156done < " ${regions_list} "
5257
53-
54- clusters_file=" ${sample_name} .scramble_clusters.tsv.gz"
55-
56- # In case it is re-run, the script will wait for a response
57- # on override the existing file, that may not work in a pipeline.
58- rm test.scramble.tsv.gz
58+ export clusters_file=" ${sample_name} .scramble_clusters.tsv.gz"
5959
6060# ScramblePart2
6161# -------------
6262
63- xDir=$PWD
64- clusterFile=$xDir /clusters
65- scrambleDir=" /app/scramble-gatk-sv"
66- meiRef=$scrambleDir /cluster_analysis/resources/MEI_consensus_seqs.fa
63+ export xDir=$PWD
64+ export clusterFile=$xDir /clusters
65+ export scrambleDir=" /app/scramble-gatk-sv"
66+ export meiRef=$scrambleDir /cluster_analysis/resources/MEI_consensus_seqs.fa
6767
6868# create a blast db from the reference
6969cat " ${reference_fasta} " | makeblastdb -in - -parse_seqids -title ref -dbtype nucl -out ref
7070
7171gunzip -c " ${clusters_file} " > $clusterFile
7272
7373# Produce ${clusterFile}_MEIs.txt
74- Rscript --vanilla $ scrambleDir /cluster_analysis/bin/SCRAMble.R --out-name $ clusterFile \
75- --cluster-file $ clusterFile --install-dir $ scrambleDir /cluster_analysis/bin \
76- --mei-refs $ meiRef --ref $ xDir /ref --no-vcf --eval-meis --cores " ${part2_threads} " \
77- --pct-align " ${percent_align_cutoff} " -n $ MIN_CLIPPED_READS --mei-score " ${alignment_score_cutoff} "
74+ Rscript --vanilla " ${ scrambleDir} " /cluster_analysis/bin/SCRAMble.R --out-name " ${ clusterFile} " \
75+ --cluster-file " ${ clusterFile} " --install-dir " ${ scrambleDir} " /cluster_analysis/bin \
76+ --mei-refs " ${ meiRef} " --ref " ${ xDir} " /ref --no-vcf --eval-meis --cores " ${part2_threads} " \
77+ --pct-align " ${percent_align_cutoff} " -n " ${ MIN_CLIPPED_READS} " --mei-score " ${alignment_score_cutoff} "
7878
7979# Save raw outputs
8080mv ${clusterFile} _MEIs.txt " ${sample_name} " .scramble.tsv
@@ -84,7 +84,7 @@ gzip "${sample_name}".scramble.tsv
8484# MakeScrambleVcf
8585# --------------
8686
87- scramble_table=" ${sample_name} .scramble.tsv.gz"
87+ export scramble_table=" ${sample_name} .scramble.tsv.gz"
8888
8989python " ${scramble_vcf_script} " \
9090 --table " ${scramble_table} " \
@@ -94,6 +94,6 @@ python "${scramble_vcf_script}" \
9494 --reference " ${reference_fasta} " \
9595 --mei-bed " ${mei_bed} " \
9696 --out unsorted.vcf.gz \
97- " ${make_scramble_vcf_args} "
97+ ${make_scramble_vcf_args}
9898bcftools sort unsorted.vcf.gz -Oz -o " ${sample_name} " .scramble.vcf.gz
9999tabix " ${sample_name} " .scramble.vcf.gz
0 commit comments