Skip to content
This repository was archived by the owner on Aug 23, 2024. It is now read-only.

Commit f295524

Browse files
committed
Merge branch 'hotfix/1.0.2'
2 parents 08dbc4d + 4d783fd commit f295524

File tree

7 files changed

+66
-23
lines changed

7 files changed

+66
-23
lines changed

CHANGES.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
### 1.0.2
2+
* Update BRASS to handle data that is _very_ quiet
3+
* Fix up the wrapper timings file so it is captured properly
4+
* Upgraded base image to [dockstore-cgpwxs:2.0.4](https://github.com/cancerit/dockstore-cgpwxs/releases/tag/2.0.4)
5+
* To pick up changes in [cgpPindel:2.2.0](https://github.com/cancerit/cgpPindel/releases/tag/v2.2.0)
6+
* Remove ability to build `*.bas` files as not possible to have optional secondary files and we state expectation of [dockstore-cgpmap](https://github.com/cancerit/dockstore-cgpmap) as data source.
7+
* Moved some processing around to reduce cpu wastage.
8+
19
### 1.0.1
2-
* Update dependencies (cgpPindel/ascatNgs) to reduce reliance on Capture::Tiny, apparent cause of some issues.
10+
* Update dependencies to reduce reliance on Capture::Tiny, apparent cause of some issues
311

412
### 1.0.0
513
* Test data in `examples/analysis_config.local.json` moved to a non-expiring location.

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
FROM quay.io/wtsicgp/dockstore-cgpwxs:2.0.3
1+
FROM quay.io/wtsicgp/dockstore-cgpwxs:2.0.4
22

33
MAINTAINER keiranmraine@gmail.com
44

55
LABEL uk.ac.sanger.cgp="Cancer Genome Project, Wellcome Trust Sanger Institute" \
6-
version="1.0.1" \
6+
version="1.0.2" \
77
description="The CGP WGS pipeline for dockstore.org"
88

99
USER root

Dockstore.cwl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dct:creator:
1919

2020
requirements:
2121
- class: DockerRequirement
22-
dockerPull: "quay.io/wtsicgp/dockstore-cgpwgs:1.0.1"
22+
dockerPull: "quay.io/wtsicgp/dockstore-cgpwgs:1.0.2"
2323

2424
hints:
2525
- class: ResourceRequirement
@@ -144,4 +144,9 @@ outputs:
144144
outputBinding:
145145
glob: WGS_*_vs_*.timings.tar.gz
146146

147+
global_time:
148+
type: File
149+
outputBinding:
150+
glob: WGS_*_vs_*.time
151+
147152
baseCommand: ["/opt/wtsi-cgp/bin/ds-wrapper.pl"]

build/opt-build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ SRC_ASCAT="https://raw.githubusercontent.com/Crick-CancerGenomics/ascat/6d40e69a
2323
VER_GRASS="v2.1.0"
2424

2525
### BRASS
26-
VER_BRASS="v5.3.1"
26+
VER_BRASS="v5.3.2"
2727
SOURCE_BLAT="http://users.soe.ucsc.edu/~kent/src/blatSrc35.zip"
2828
SRC_FASTA36="https://github.com/wrpearson/fasta36/archive/v36.3.8d_13Apr16.tar.gz"
2929

examples/analysis_config.local.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
"path": "/tmp/timings_WGS.tar.gz",
3939
"class": "File"
4040
},
41+
"global_time": {
42+
"path": "/tmp/global_WGS.time",
43+
"class": "File"
44+
},
4145
"run_params": {
4246
"path": "/tmp/params_WGS.params",
4347
"class": "File"

scripts/analysisWGS.sh

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ ln -fs $BAM_MT $BAM_MT_TMP
114114
ln -fs $BAM_WT $BAM_WT_TMP
115115
ln -fs $BAM_MT.bai $BAM_MT_TMP.bai
116116
ln -fs $BAM_WT.bai $BAM_WT_TMP.bai
117+
ln -fs $BAM_MT.bas $BAM_MT_TMP.bas
118+
ln -fs $BAM_WT.bas $BAM_WT_TMP.bas
117119

118120
## Make fake copynumber so we can run early steps of caveman
119121
perl -alne 'print join(qq{\t},$F[0],0,$F[1],2);' < $REF_BASE/genome.fa.fai | tee $TMP/norm.cn.bed > $TMP/tum.cn.bed
@@ -158,22 +160,6 @@ fi
158160

159161
echo "Setting up Parallel block 1"
160162

161-
if [ ! -f "${BAM_MT}.bas" ]; then
162-
echo -e "\t[Parallel block 1] BAS $NAME_MT added..."
163-
do_parallel[bas_MT]="bam_stats -i $BAM_MT_TMP -o $BAM_MT_TMP.bas"
164-
else
165-
ln -fs $BAM_MT.bas $BAM_MT_TMP.bas
166-
echo '#PRE EXISTING $NAME_MT.bam.bas file found' > $OUTPUT_DIR/timings/${PROTOCOL}_${NAME_MT}_vs_${NAME_WT}.time.bas_MT
167-
fi
168-
169-
if [ ! -f "${BAM_WT}.bas" ]; then
170-
echo -e "\t[Parallel block 1] BAS $NAME_WT added..."
171-
do_parallel[bas_WT]="bam_stats -i $BAM_WT_TMP -o $BAM_WT_TMP.bas"
172-
else
173-
ln -fs $BAM_WT.bas $BAM_WT_TMP.bas
174-
echo '#PRE EXISTING $NAME_WT.bam.bas file found' > $OUTPUT_DIR/timings/${PROTOCOL}_${NAME_MT}_vs_${NAME_WT}.time.bas_WT
175-
fi
176-
177163
echo -e "\t[Parallel block 1] Genotype Check added..."
178164
do_parallel[geno]="nice -n 10 compareBamGenotypes.pl \
179165
-o $OUTPUT_DIR/${PROTOCOL}_${NAME_MT}_vs_${NAME_WT}/genotyped \
@@ -187,6 +173,25 @@ do_parallel[verify_WT]="nice -n 10 verifyBamHomChk.pl -d 25 \
187173
-b $BAM_WT_TMP \
188174
-j $OUTPUT_DIR/${PROTOCOL}_${NAME_WT}/contamination/result.json"
189175

176+
echo -e "\t[Parallel block 1] cgpPindel input added..."
177+
do_parallel[cgpPindel_input]="pindel.pl \
178+
-o $OUTPUT_DIR/${PROTOCOL}_${NAME_MT}_vs_${NAME_WT}/pindel \
179+
-r $REF_BASE/genome.fa \
180+
-t $BAM_MT_TMP \
181+
-n $BAM_WT_TMP \
182+
-s $REF_BASE/pindel/simpleRepeats.bed.gz \
183+
-u $REF_BASE/pindel/pindel_np.gff3.gz \
184+
-f $REF_BASE/pindel/${PROTOCOL}_Rules.lst \
185+
-g $REF_BASE/vagrent/codingexon_regions.indel.bed.gz \
186+
-st $PROTOCOL \
187+
-as $ASSEMBLY \
188+
-sp '$SPECIES' \
189+
-e $PINDEL_EXCLUDE \
190+
-b $REF_BASE/pindel/HiDepth.bed.gz \
191+
-c $CPU \
192+
-sf $REF_BASE/pindel/softRules.lst \
193+
-p input"
194+
190195
echo -e "\t[Parallel block 1] BB alleleCount added..."
191196
if [ ! -z ${SKIPBB+x} ]; then
192197
do_parallel[alleleCount]="echo 'BB allele count disabled by params'"

scripts/ds-wrapper.pl

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@
6767
die "ERROR: Please define assembly, not found in [CR|B]AM headers.\n";
6868
}
6969

70+
$opts{'mt_sm'} = sample_name_from_xam($opts{'t'});
71+
$opts{'wt_sm'} = sample_name_from_xam($opts{'n'});
72+
7073
printf "Options loaded: \n%s\n",Dumper(\%opts);
7174

7275
## unpack the reference area:
@@ -110,8 +113,7 @@
110113
print $R_FH qq{options(bitmapType='cairo')\n};
111114
close $R_FH;
112115

113-
make_path($ENV{HOME}.'/timings');
114-
my $cmd = "/usr/bin/time -o $ENV{HOME}/timings/analysisWGS.time -v /opt/wtsi-cgp/bin/analysisWGS.sh";
116+
my $cmd = sprintf '/usr/bin/time -o %s/WGS_%s_vs_%s.time -v /opt/wtsi-cgp/bin/analysisWGS.sh', $ENV{HOME}, $opts{'mt_sm'}, $opts{'wt_sm'};
115117
exec($cmd); # I will never return to the perl code
116118

117119
sub add_species_flag_ini {
@@ -130,6 +132,25 @@ sub add_species_flag_ini {
130132
return $ini_out;
131133
}
132134

135+
sub sample_name_from_xam {
136+
my $xam = shift;
137+
my $sm;
138+
open my $SAM, '-|', "samtools view -H $xam" or die $!;
139+
while(my $line = <$SAM>) {
140+
next unless($line =~ m/^\@RG/);
141+
chomp $line;
142+
$line .= "\t"; # simplify matching
143+
if($line =~ m/SM:([^\t]+)\t/) {
144+
my $smtmp = $1;
145+
if(defined $sm && $smtmp ne $sm) {
146+
die "Conflicting SM values found in different RG headers";
147+
}
148+
$sm = $smtmp;
149+
}
150+
}
151+
return $sm;
152+
}
153+
133154
sub species_assembly_from_xam {
134155
my $xam = shift;
135156
my %assembly_set;

0 commit comments

Comments
 (0)