Skip to content

Commit bb150a2

Browse files
committed
vsnp3 version 3.32
1 parent 4d28893 commit bb150a2

29 files changed

+2905
-1434
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Benefits of defining SNPs:
7171
## 📦 Installation
7272

7373
```bash
74-
conda create -c conda-forge -c bioconda -n vsnp3 vsnp3=3.31
74+
conda create -c conda-forge -c bioconda -n vsnp3 vsnp3=3.32
7575
conda activate vsnp3
7676
```
7777

bin/vsnp3_alignment_vcf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
__version__ = "3.31"
3+
__version__ = "3.32"
44

55
import os
66
import subprocess

bin/vsnp3_annotation.py

Lines changed: 160 additions & 61 deletions
Large diffs are not rendered by default.

bin/vsnp3_assembly.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
__version__ = "3.31"
3+
__version__ = "3.32"
44

55
import os
66
import sys
@@ -189,15 +189,19 @@ def stats(self, FASTA=None):
189189
self.total_contig_lengths = total_contig_lengths
190190
self.mean_coverage = mean_coverage
191191

192-
print(f'\t Contig count: {bcolors.YELLOW}{self.contig_count:,}{bcolors.ENDC}, \n \
193-
Contig length counts <|301-999bp|>: {bcolors.RED}{self.small_contigs_count:,}{bcolors.ENDC}|{bcolors.BLUE}{self.mid_size:,}{bcolors.ENDC}|{bcolors.GREEN}{self.greater_one_kb_count:,}{bcolors.ENDC}, \n \
194-
Longest contig: {bcolors.GREEN}{self.longest_contig:,}{bcolors.ENDC}, \n \
195-
Total length: {bcolors.WHITE}{self.total_contig_lengths:,}{bcolors.ENDC}, \n \
196-
N50: {bcolors.PURPLE}{self.n50:,}{bcolors.ENDC}, \n \
197-
{self.coverage_title}: {bcolors.YELLOW}{self.mean_coverage:,.1f}X{bcolors.ENDC}\n')
192+
# Fixed multi-line f-string using parentheses for string concatenation
193+
print(f'\t Contig count: {bcolors.YELLOW}{self.contig_count:,}{bcolors.ENDC}, \n'
194+
f' Contig length counts <|301-999bp|>: {bcolors.RED}{self.small_contigs_count:,}{bcolors.ENDC}|{bcolors.BLUE}{self.mid_size:,}{bcolors.ENDC}|{bcolors.GREEN}{self.greater_one_kb_count:,}{bcolors.ENDC}, \n'
195+
f' Longest contig: {bcolors.GREEN}{self.longest_contig:,}{bcolors.ENDC}, \n'
196+
f' Total length: {bcolors.WHITE}{self.total_contig_lengths:,}{bcolors.ENDC}, \n'
197+
f' N50: {bcolors.PURPLE}{self.n50:,}{bcolors.ENDC}, \n'
198+
f' {self.coverage_title}: {bcolors.YELLOW}{self.mean_coverage:,.1f}X{bcolors.ENDC}\n')
198199

199200
def latex(self, tex, groups=None):
200201
blast_banner = Banner("Assembly")
202+
# Define LaTeX newline constant to avoid raw string in f-string
203+
latex_newline = r"\\"
204+
201205
print(r'\begin{table}[ht!]', file=tex)
202206
print(r'\begin{adjustbox}{width=1\textwidth}', file=tex)
203207
print(r'\begin{center}', file=tex)
@@ -206,9 +210,11 @@ def latex(self, tex, groups=None):
206210
print(r'\end{adjustbox}', file=tex)
207211
print(r'\begin{adjustbox}{width=1\textwidth}', file=tex)
208212
print(r'\begin{tabular}{ l | l | l | l | l | l }', file=tex)
209-
print(f'Contig count & Contig length counts $<$ | 301-999bp | $>$ & Longest contig & Total length & N50 & {self.coverage_title} {r"\\"}', file=tex)
213+
# Fixed raw string in f-string by defining constant
214+
print(f'Contig count & Contig length counts $<$ | 301-999bp | $>$ & Longest contig & Total length & N50 & {self.coverage_title} {latex_newline}', file=tex)
210215
print(r'\hline', file=tex)
211-
print(f'{self.contig_count:,} & {self.small_contigs_count:,} | {self.mid_size:,} | {self.greater_one_kb_count:,} & {self.longest_contig:,} & {self.total_contig_lengths:,} & {self.n50:,} & {self.mean_coverage:,.1f}X {r"\\"}', file=tex)
216+
# Fixed raw string in f-string by using constant
217+
print(f'{self.contig_count:,} & {self.small_contigs_count:,} | {self.mid_size:,} | {self.greater_one_kb_count:,} & {self.longest_contig:,} & {self.total_contig_lengths:,} & {self.n50:,} & {self.mean_coverage:,.1f}X {latex_newline}', file=tex)
212218
print(r'\hline', file=tex)
213219
print(r'\end{adjustbox}', file=tex)
214220
print(r'\vspace{0.1 mm}', file=tex)

bin/vsnp3_best_reference_sourmash.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
__version__ = "3.31"
3+
__version__ = "3.32"
44

55
import os
66
import subprocess
@@ -41,7 +41,7 @@ def run(self,):
4141
sourmash search *_R1*.fastq.gz.sig ../sourmash/ref_db.sbt.zip -o sourmash_findings.csv
4242
'''
4343
all_ref_options = []
44-
ref_options_file = os.path.abspath(f'{self.script_path}/../dependencies/reference_options_paths.txt')
44+
ref_options_file = os.path.abspath(os.path.join(self.script_path, '..', 'dependencies', 'reference_options_paths.txt'))
4545
self.ref_options_file = ref_options_file
4646

4747
# Check if reference options file exists
@@ -56,13 +56,13 @@ def run(self,):
5656
return
5757

5858
# Read reference paths
59-
with open(f'{ref_options_file}', 'r') as dep_paths:
59+
with open(ref_options_file, 'r') as dep_paths:
6060
dependency_paths = [line.strip() for line in dep_paths]
6161

6262
# Collect all reference options from the specified paths
6363
for path in dependency_paths:
6464
if os.path.exists(path):
65-
ref_options = glob.glob(f'{path}/*')
65+
ref_options = glob.glob(os.path.join(path, '*'))
6666
all_ref_options = all_ref_options + ref_options
6767
else:
6868
print(f"Warning: Reference path does not exist: {path}")
@@ -73,7 +73,7 @@ def run(self,):
7373
# Get FASTA files from each reference directory
7474
self.fasta_list = []
7575
for each_path in all_ref_options:
76-
self.fasta_list.extend(glob.glob(f'{each_path}/*.fasta'))
76+
self.fasta_list.extend(glob.glob(os.path.join(each_path, '*.fasta')))
7777

7878
# Create dictionary mapping FASTA headers to file paths
7979
header_dict = {}
@@ -97,7 +97,7 @@ def run(self,):
9797
return
9898

9999
# Check if sourmash database exists
100-
sourmash_db = f'{self.script_path}/../dependencies/ref_db.sbt.zip'
100+
sourmash_db = os.path.join(self.script_path, '..', 'dependencies', 'ref_db.sbt.zip')
101101
if not os.path.exists(sourmash_db):
102102
print(f"Error: Sourmash database not found: {sourmash_db}")
103103
self.top_header_found = "Sourmash Database Not Found"
@@ -108,6 +108,9 @@ def run(self,):
108108
self.sourmash_df = pd.DataFrame()
109109
return
110110

111+
# Create signature file path
112+
fastq_sig_file = f'{self.FASTQ_R1}.sig'
113+
111114
# Run sourmash sketch
112115
try:
113116
sketch_result = subprocess.run(
@@ -128,16 +131,19 @@ def run(self,):
128131
self.sourmash_df = pd.DataFrame()
129132
return
130133

134+
# Create search CSV file path
135+
search_csv_file = f'{self.sample_name}_search.csv'
136+
131137
# Run sourmash search
132138
try:
133139
search_result = subprocess.run(
134140
[
135141
"sourmash",
136142
"search",
137-
f'{self.FASTQ_R1}.sig',
143+
fastq_sig_file,
138144
sourmash_db,
139145
"-o",
140-
f'{self.sample_name}_search.csv',
146+
search_csv_file,
141147
'--threshold=0.001'
142148
],
143149
capture_output=True,
@@ -154,22 +160,22 @@ def run(self,):
154160
self.reference_set = None
155161
self.top_fasta_header = "Sourmash Search Failed"
156162
self.sourmash_df = pd.DataFrame()
157-
if os.path.exists(f'{self.FASTQ_R1}.sig'):
158-
os.remove(f'{self.FASTQ_R1}.sig')
163+
if os.path.exists(fastq_sig_file):
164+
os.remove(fastq_sig_file)
159165
return
160166

161167
# Read search results
162168
try:
163-
if os.path.exists(f'{self.sample_name}_search.csv') and os.path.getsize(f'{self.sample_name}_search.csv') > 0:
164-
self.sourmash_df = pd.read_csv(f'{self.sample_name}_search.csv')
169+
if os.path.exists(search_csv_file) and os.path.getsize(search_csv_file) > 0:
170+
self.sourmash_df = pd.read_csv(search_csv_file)
165171
else:
166172
print("Warning: Sourmash search produced no results or empty file")
167173
self.sourmash_df = pd.DataFrame()
168174
except Exception as e:
169175
print(f"Error reading sourmash search results: {str(e)}")
170176
self.sourmash_df = pd.DataFrame()
171177

172-
#Force a top hit to a specific reference, ie TB lineages to
178+
# Force a top hit to a specific reference, ie TB lineages to
173179
try:
174180
self.top_header_found = self.sourmash_df['name'][0].split()[0] # top hit
175181
except (IndexError, KeyError):
@@ -212,15 +218,15 @@ def run(self,):
212218
self.top_fasta_header = 'Error reading reference file'
213219

214220
# Create sourmash directory and move results
215-
dir = 'sourmash'
216-
if not os.path.exists(dir):
217-
os.makedirs(dir)
221+
sourmash_dir = 'sourmash'
222+
if not os.path.exists(sourmash_dir):
223+
os.makedirs(sourmash_dir)
218224

219-
if os.path.exists(f'{self.sample_name}_search.csv'):
220-
shutil.move(f'{self.sample_name}_search.csv', dir)
225+
if os.path.exists(search_csv_file):
226+
shutil.move(search_csv_file, sourmash_dir)
221227

222-
if os.path.exists(f'{self.FASTQ_R1}.sig'):
223-
os.remove(f'{self.FASTQ_R1}.sig')
228+
if os.path.exists(fastq_sig_file):
229+
os.remove(fastq_sig_file)
224230

225231
print("#############\n")
226232

@@ -246,7 +252,7 @@ def latex(self, tex):
246252
count+=1
247253
if count <= 10:
248254
percentage = f'{row[1]:.1%}'
249-
name = row[4].replace("_", r"\_") if isinstance(row[2], str) else "Invalid Name"
255+
name = row[4].replace("_", r"\_") if isinstance(row[4], str) else "Invalid Name"
250256
print(percentage.replace("%", r"\%") + ' & ' + name + r' \\', file=tex)
251257
print(r'\hline', file=tex)
252258
except Exception as e:

0 commit comments

Comments
 (0)