Skip to content

Commit 225ce35

Browse files
authored
Merge pull request #7787 from wm75/snpeff-dm-more-fixes
More fixes and improvements for the SnpEff data manager
2 parents 2ea756a + 8169ae6 commit 225ce35

File tree

3 files changed

+34
-26
lines changed

3 files changed

+34
-26
lines changed

data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,24 @@
22
import json
33
import optparse
44
import os
5+
import re
56
import subprocess
67
import sys
78

89

10+
def getSnpeffVersion():
11+
snpeff_version = 'SnpEff ?.?'
12+
args = ['snpEff', '-version']
13+
try:
14+
version_output = subprocess.check_output(args, shell=False).decode()
15+
except subprocess.CalledProcessError as e:
16+
sys.exit(e.returncode)
17+
m = re.match(r'^(SnpEff)\s*(\d+\.\d+).*$', version_output)
18+
if m:
19+
snpeff_version = m.groups()[0] + m.groups()[1]
20+
return snpeff_version
21+
22+
923
def fetch_databases(data_manager_dict, target_directory):
1024
if not os.path.exists(target_directory):
1125
os.makedirs(target_directory)
@@ -18,6 +32,8 @@ def fetch_databases(data_manager_dict, target_directory):
1832
data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
1933
data_manager_dict['data_tables']['snpeffv_databases'] = data_manager_dict['data_tables'].get('snpeffv_databases', [])
2034
data_table_entries = []
35+
snpeff_version = getSnpeffVersion()
36+
2137
with open(databases_path, 'r') as fh:
2238
for line in fh:
2339
fields = line.split('\t')
@@ -29,7 +45,7 @@ def fetch_databases(data_manager_dict, target_directory):
2945
if genome_version == '30c2c903' or fields[1].strip() == 'TestCase' or fields[1].strip().startswith('Test_'):
3046
continue
3147
description = fields[1].strip() + ' : ' + genome_version
32-
data_table_entries.append(dict(value=genome_version, name=description))
48+
data_table_entries.append(dict(key=snpeff_version + '_' + genome_version, version=snpeff_version, value=genome_version, name=description))
3349
data_manager_dict['data_tables']['snpeffv_databases'] = data_table_entries
3450
return data_manager_dict
3551

data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ python '$__tool_directory__/data_manager_snpEff_databases.py' '$out_file'
1616
<output name="out_file">
1717
<assert_contents>
1818
<!-- Check that a genome was added -->
19-
<has_text text="GRCh38.86" />
19+
<has_text text="ebola_zaire" />
2020
</assert_contents>
2121
</output>
2222
</test>

data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_download.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,14 @@ def getOrganismNames(genomes, organisms):
4747

4848
def getSnpeffVersion():
4949
snpeff_version = 'SnpEff ?.?'
50-
stderr_path = 'snpeff.err'
51-
args = ['snpEff', '-h']
52-
with open(stderr_path, 'w') as stderr_fh:
53-
return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno())
54-
if return_code != 255:
55-
sys.exit(return_code)
56-
with open(stderr_path) as fh:
57-
for line in fh:
58-
m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line)
59-
if m:
60-
snpeff_version = m.groups()[0] + m.groups()[1]
61-
break
50+
args = ['snpEff', '-version']
51+
try:
52+
version_output = subprocess.check_output(args, shell=False).decode()
53+
except subprocess.CalledProcessError as e:
54+
sys.exit(e.returncode)
55+
m = re.match(r'^(SnpEff)\s*(\d+\.\d+).*$', version_output)
56+
if m:
57+
snpeff_version = m.groups()[0] + m.groups()[1]
6258
return snpeff_version
6359

6460

@@ -124,7 +120,7 @@ def download_database(data_manager_dict, target_directory, genome_version, organ
124120
name=genomedb_name,
125121
path=f"snpEff/{db_version}/data"
126122
)
127-
_add_data_table_entry(data_manager_dict, 'snpeffv_genomedb', data_table_entry)
123+
data_manager_dict['data_tables']['snpeffv_genomedb'].append(data_table_entry)
128124

129125
if regulationdb_name:
130126
data_table_entry = dict(
@@ -134,16 +130,7 @@ def download_database(data_manager_dict, target_directory, genome_version, organ
134130
value=regulationdb_name,
135131
name=regulationdb_name
136132
)
137-
_add_data_table_entry(data_manager_dict, 'snpeffv_regulationdb', data_table_entry)
138-
139-
return data_manager_dict
140-
141-
142-
def _add_data_table_entry(data_manager_dict, data_table, data_table_entry):
143-
data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
144-
data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
145-
data_manager_dict['data_tables'][data_table].append(data_table_entry)
146-
return data_manager_dict
133+
data_manager_dict['data_tables']['snpeffv_regulationdb'].append(data_table_entry)
147134

148135

149136
def main():
@@ -158,7 +145,12 @@ def main():
158145
params = json.load(fh)
159146
target_directory = params['output_data'][0]['extra_files_path']
160147
os.mkdir(target_directory)
161-
data_manager_dict = {}
148+
data_manager_dict = {
149+
'data_tables': {
150+
'snpeffv_genomedb': [],
151+
'snpeffv_regulationdb': []
152+
}
153+
}
162154

163155
# Create SnpEff Reference Data
164156
for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')):

0 commit comments

Comments
 (0)