Skip to content

Commit 635846d

Browse files
authored
Merge pull request #2033 from Clinical-Genomics/release/11.2.0
Release/11.2.0
2 parents f168961 + 396b9bc commit 635846d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+2883
-110
lines changed

CHANGELOG.md

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33
All notable changes to this project will be documented in this file.
44
This project adheres to [Semantic Versioning](http://semver.org/).
55

6+
## [11.2.0]
7+
8+
- Adds retroseq for mobile element detection
9+
10+
### Databases
11+
12+
- expansionhunter variant catalog: v4.0.2 -> v5.0.0
13+
14+
### Tools
15+
16+
- RetroSeq: 9d4f3b5
17+
618
## [11.1.3]
719

820
- Adds Gens' bed index file to deliverables

containers/retroseq/Dockerfile

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
################## BASE IMAGE ######################
2+
3+
FROM clinicalgenomics/mip_base:2.1
4+
5+
################## METADATA ######################
6+
7+
LABEL base_image="clinicalgenomics/mip_base:2.1"
8+
LABEL version="1"
9+
LABEL software="retroseq"
10+
LABEL software.version="1.5_9d4f3b5"
11+
LABEL extra.binaries="retroseq"
12+
LABEL maintainer="Clinical-Genomics/MIP"
13+
14+
RUN apt-get update --fix-missing && \
15+
apt-get install -y --no-install-recommends \
16+
ca-certificates && \
17+
apt-get clean && \
18+
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
19+
20+
RUN conda install samtools exonerate bedtools bcftools && \
21+
/opt/conda/bin/conda clean -ya
22+
23+
RUN git clone https://github.com/tk2/RetroSeq.git /opt/conda/share/RetroSeq
24+
25+
WORKDIR /opt/conda/share/RetroSeq
26+
27+
## Remove samtool check
28+
## Make sure we're on the right commit and remove samtools check
29+
RUN git reset --hard 9d4f3b5 && \
30+
sed -i '/RetroSeq::Utilities::checkBinary( q\[samtools\].*/d' ./bin/retroseq.pl

definitions/install_parameters.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ rd_dna:
9393
- picard
9494
- plink
9595
- python
96+
- retroseq
9697
- rhocall
9798
- rtg-tools
9899
- sambamba

definitions/rd_dna_initiation_map.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ CHAIN_ALL:
4949
- sv_rankvariant
5050
- sv_reformat
5151
- vcf2cytosure_ar
52+
- CHAIN_MOBILE_ELEMENTS:
53+
- me_merge_bam
54+
- retroseq
55+
- me_merge_vcfs
56+
- me_annotate
57+
- me_varianteffectpredictor
58+
- me_filter
5259
- CHAIN_MAIN:
5360
# PARALLEL chains, which inherit from MAIN in initiation, but are merged back to CHAIN_MAIN after execution
5461
- PARALLEL:

definitions/rd_dna_parameters.yaml

+138-1
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,11 @@ recipe_core_number:
226226
gzip_fastq: 0
227227
manta: 36
228228
markduplicates: 13
229+
me_annotate: 2
230+
me_filter: 2
231+
me_merge_bam: 5
232+
me_merge_vcfs: 2
233+
me_varianteffectpredictor: 4
229234
mitodel: 1
230235
mt_annotation: 1
231236
multiqc_ar: 1
@@ -234,6 +239,7 @@ recipe_core_number:
234239
picardtools_collectmultiplemetrics: 1
235240
plink: 1
236241
qccollect_ar: 1
242+
retroseq: 2
237243
rhocall_ar: 13
238244
rhocall_viz: 1
239245
rtg_vcfeval: 36
@@ -301,6 +307,7 @@ recipe_memory:
301307
picardtools_collecthsmetrics: 8
302308
picardtools_collectmultiplemetrics: 8
303309
plink: 10
310+
retroseq: 5
304311
rhocall_ar: 2
305312
rhocall_viz: 5
306313
sambamba_depth: 10
@@ -355,6 +362,11 @@ recipe_time:
355362
gzip_fastq: 2
356363
manta: 30
357364
markduplicates: 20
365+
me_annotate: 2
366+
me_filter: 1
367+
me_merge_bam: 5
368+
me_merge_vcfs: 2
369+
me_varianteffectpredictor: 3
358370
mitodel: 2
359371
mt_annotation: 1
360372
multiqc_ar: 5
@@ -365,6 +377,7 @@ recipe_time:
365377
prepareforvariantannotationblock: 5
366378
qccollect_ar: 1
367379
rankvariant: 10
380+
retroseq: 5
368381
rhocall_ar: 5
369382
rhocall_viz: 1
370383
rtg_vcfeval: 1
@@ -405,6 +418,7 @@ picardtools_path:
405418
- markduplicates
406419
- picardtools_collecthsmetrics
407420
- picardtools_collectmultiplemetrics
421+
- retroseq
408422
- sv_reformat
409423
data_type: SCALAR
410424
type: path
@@ -866,7 +880,7 @@ expansionhunter_variant_catalog_file_path:
866880
associated_recipe:
867881
- expansionhunter
868882
data_type: SCALAR
869-
default: grch37_expansionhunter_variant_catalog_-4.0.2-.json
883+
default: grch37_expansionhunter_variant_catalog_-5.0.0-.json
870884
exists_check: file
871885
is_reference: 1
872886
reference: reference_dir
@@ -1250,6 +1264,129 @@ sv_reformat_remove_genes_file:
12501264
mandatory: no
12511265
reference: reference_dir
12521266
type: path
1267+
## Mobile element chain
1268+
me_merge_bam:
1269+
analysis_mode: sample
1270+
associated_recipe:
1271+
- mip
1272+
data_type: SCALAR
1273+
default: 1
1274+
file_tag: "_all"
1275+
program_executables:
1276+
- samtools
1277+
outfile_suffix: ".bam"
1278+
type: recipe
1279+
retroseq:
1280+
analysis_mode: sample
1281+
associated_recipe:
1282+
- mip
1283+
data_type: SCALAR
1284+
default: 1
1285+
file_tag: _me
1286+
outfile_suffix: ".vcf.gz"
1287+
program_executables:
1288+
- bcftools
1289+
- picard
1290+
- retroseq.pl
1291+
- tabix
1292+
type: recipe
1293+
mobile_element_reference:
1294+
associated_recipe:
1295+
- retroseq
1296+
data_type: HASH
1297+
is_reference: 1
1298+
reference: reference_dir
1299+
type: path
1300+
me_merge_vcfs:
1301+
analysis_mode: case
1302+
associated_recipe:
1303+
- mip
1304+
data_type: SCALAR
1305+
default: 1
1306+
file_tag: _me
1307+
outfile_suffix: ".vcf.gz"
1308+
program_executables:
1309+
- bgzip
1310+
- svdb
1311+
- tabix
1312+
type: recipe
1313+
me_merge_vcfs_overlap:
1314+
associated_recipe:
1315+
- me_merge_vcfs
1316+
data_type: SCALAR
1317+
default: 0.5
1318+
type: recipe_argument
1319+
me_merge_vcfs_bnd_distance:
1320+
associated_recipe:
1321+
- me_merge_vcfs
1322+
data_type: SCALAR
1323+
default: 150
1324+
type: recipe_argument
1325+
me_annotate:
1326+
analysis_mode: case
1327+
associated_recipe:
1328+
- mip
1329+
data_type: SCALAR
1330+
default: 1
1331+
file_tag: _ann
1332+
outfile_suffix: ".vcf.gz"
1333+
program_executables:
1334+
- bcftools
1335+
- svdb
1336+
- tabix
1337+
type: recipe
1338+
me_annotate_query_files:
1339+
associated_recipe:
1340+
- me_annotate
1341+
data_type: HASH
1342+
exists_check: file
1343+
is_reference: 1
1344+
reference: reference_dir
1345+
type: path
1346+
me_annotate_query_overlap:
1347+
associated_recipe:
1348+
- me_annotate
1349+
data_type: SCALAR
1350+
default: -1
1351+
type: recipe_argument
1352+
me_annotate_query_bnd_distance:
1353+
associated_recipe:
1354+
- me_annotate
1355+
data_type: SCALAR
1356+
default: 150
1357+
type: recipe_argument
1358+
me_varianteffectpredictor:
1359+
analysis_mode: case
1360+
associated_recipe:
1361+
- mip
1362+
data_type: SCALAR
1363+
default: 1
1364+
file_tag: _vep
1365+
outfile_suffix: ".vcf.gz"
1366+
program_executables:
1367+
- bcftools
1368+
- tabix
1369+
- vep
1370+
type: recipe
1371+
me_filter:
1372+
analysis_mode: case
1373+
associated_recipe:
1374+
- mip
1375+
data_type: SCALAR
1376+
default: 1
1377+
file_tag: _filter
1378+
outfile_suffix: ".vcf.gz"
1379+
program_executables:
1380+
- bcftools
1381+
- mip
1382+
- tabix
1383+
type: recipe
1384+
me_filter_frequency_threshold:
1385+
associated_recipe:
1386+
- me_filter
1387+
data_type: SCALAR
1388+
default: 0.1
1389+
type: recipe_argument
12531390
## GATK CollectReadCounts
12541391
gatk_collectreadcounts:
12551392
analysis_mode: sample

documentation/Setup.md

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ You can speed up, for instance, the Readonly module by also installing the compa
6464
- [PicardTools] (version: 2.27.2)
6565
- [PLINK] (version: 1.90b3x35)
6666
- [Preseq] (version: 3.1.2)
67+
- [RetroSeq] (version: 9d4f3b5)
6768
- [Rhocall] (version: 0.5.1)
6869
- [RSeQC] (version: 4.0.0)
6970
- [rtg-tools] (version: 3.12)
@@ -195,6 +196,7 @@ Corresponding MIP references:
195196
[PicardTools]: http://broadinstitute.github.io/picard/
196197
[PLINK]: https://www.cog-genomics.org/plink2
197198
[Preseq]: http://smithlabresearch.org/software/preseq/
199+
[RetroSeq]: https://github.com/tk2/RetroSeq
198200
[Rhocall]: https://github.com/dnil/rhocall
199201
[RSeQC]: http://rseqc.sourceforge.net/
200202
[rtg-tools]: https://github.com/RealTimeGenomics/rtg-tools

lib/MIP/Cli/Mip/Analyse/Rd_dna.pm

+109
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,115 @@ q{Default: hgvs, symbol, numbers, sift, polyphen, humdiv, domains, protein, ccds
11561156
)
11571157
);
11581158

1159+
option(
1160+
q{me_merge_bam} => (
1161+
cmd_tags => [q{Analysis recipe switch}],
1162+
documentation => q{Prepare bam files for RetroSeq},
1163+
is => q{rw},
1164+
isa => enum( [ 0, 1, 2 ] ),
1165+
)
1166+
);
1167+
1168+
option(
1169+
q{retroseq} => (
1170+
cmd_tags => [q{Analysis recipe switch}],
1171+
documentation => q{Discover mobile elements using RetroSeq},
1172+
is => q{rw},
1173+
isa => enum( [ 0, 1, 2 ] ),
1174+
)
1175+
);
1176+
1177+
option(
1178+
q{me_merge_vcfs} => (
1179+
cmd_tags => [q{Analysis recipe switch}],
1180+
documentation => q{Merge sample vcfs from RetroSeq},
1181+
is => q{rw},
1182+
isa => enum( [ 0, 1, 2 ] ),
1183+
)
1184+
);
1185+
1186+
option(
1187+
q{me_merge_vcfs_bnd_distance} => (
1188+
cmd_tags => [q{Default: 150}],
1189+
documentation => q{Maximum distance between two similar BNDs},
1190+
is => q{rw},
1191+
isa => Num,
1192+
)
1193+
);
1194+
1195+
option(
1196+
q{me_merge_vcfs_overlap} => (
1197+
cmd_tags => [q{Default: }],
1198+
documentation => q{Overlap required to merge two events},
1199+
is => q{rw},
1200+
isa => Num,
1201+
)
1202+
);
1203+
1204+
option(
1205+
q{mobile_element_reference} => (
1206+
cmd_tags => [q{file.vcf=TE_type}],
1207+
documentation => q{Database file(s) for mobile element iscovery},
1208+
is => q{rw},
1209+
isa => HashRef,
1210+
)
1211+
);
1212+
1213+
option(
1214+
q{me_annotate} => (
1215+
cmd_tags => [q{Analysis recipe switch}],
1216+
documentation => q{Annotate mobile elememnt},
1217+
is => q{rw},
1218+
isa => enum( [ 0, 1, 2 ] ),
1219+
)
1220+
);
1221+
1222+
option(
1223+
q{me_annotate_query_bnd_distance} => (
1224+
cmd_tags => [q{Default: 150}],
1225+
documentation => q{Maximum distance between two similar BNDs},
1226+
is => q{rw},
1227+
isa => Num,
1228+
)
1229+
);
1230+
1231+
option(
1232+
q{me_annotate_query_overlap} => (
1233+
cmd_tags => [q{Default: }],
1234+
documentation => q{Overlap required to annotate},
1235+
is => q{rw},
1236+
isa => Num,
1237+
)
1238+
);
1239+
1240+
option(
1241+
q{me_varianteffectpredictor} => (
1242+
cmd_tags => [q{Analysis recipe switch}],
1243+
documentation => q{Annotate mobile elements with VEP},
1244+
is => q{rw},
1245+
isa => enum( [ 0, 1, 2 ] ),
1246+
)
1247+
);
1248+
1249+
option(
1250+
q{me_filter} => (
1251+
cmd_tags => [q{Analysis recipe switch}],
1252+
documentation => q{Filter mobile elements},
1253+
is => q{rw},
1254+
isa => enum( [ 0, 1, 2 ] ),
1255+
)
1256+
);
1257+
1258+
option(
1259+
q{me_filter_frequency_threshold} => (
1260+
cmd_tags => [q{Default: 0.1}],
1261+
documentation =>
1262+
q{Threshold frequency for variants to be filtered out, set to 0 to disable},
1263+
is => q{rw},
1264+
isa => Num,
1265+
)
1266+
);
1267+
11591268
option(
11601269
q{gatk_haplotypecaller} => (
11611270
cmd_tags => [q{Analysis recipe switch}],

0 commit comments

Comments
 (0)