11#!/usr/bin/env python3
22
3- __version__ = "3.31 "
3+ __version__ = "3.32 "
44
55import os
66import subprocess
@@ -41,7 +41,7 @@ def run(self,):
4141 sourmash search *_R1*.fastq.gz.sig ../sourmash/ref_db.sbt.zip -o sourmash_findings.csv
4242 '''
4343 all_ref_options = []
44- ref_options_file = os .path .abspath (f' { self .script_path } /../ dependencies/ reference_options_paths.txt' )
44+ ref_options_file = os .path .abspath (os . path . join ( self .script_path , '..' , ' dependencies' , ' reference_options_paths.txt') )
4545 self .ref_options_file = ref_options_file
4646
4747 # Check if reference options file exists
@@ -56,13 +56,13 @@ def run(self,):
5656 return
5757
5858 # Read reference paths
59- with open (f' { ref_options_file } ' , 'r' ) as dep_paths :
59+ with open (ref_options_file , 'r' ) as dep_paths :
6060 dependency_paths = [line .strip () for line in dep_paths ]
6161
6262 # Collect all reference options from the specified paths
6363 for path in dependency_paths :
6464 if os .path .exists (path ):
65- ref_options = glob .glob (f' { path } /*' )
65+ ref_options = glob .glob (os . path . join ( path , '*' ) )
6666 all_ref_options = all_ref_options + ref_options
6767 else :
6868 print (f"Warning: Reference path does not exist: { path } " )
@@ -73,7 +73,7 @@ def run(self,):
7373 # Get FASTA files from each reference directory
7474 self .fasta_list = []
7575 for each_path in all_ref_options :
76- self .fasta_list .extend (glob .glob (f' { each_path } / *.fasta' ))
76+ self .fasta_list .extend (glob .glob (os . path . join ( each_path , ' *.fasta') ))
7777
7878 # Create dictionary mapping FASTA headers to file paths
7979 header_dict = {}
@@ -97,7 +97,7 @@ def run(self,):
9797 return
9898
9999 # Check if sourmash database exists
100- sourmash_db = f' { self .script_path } /../ dependencies/ ref_db.sbt.zip'
100+ sourmash_db = os . path . join ( self .script_path , '..' , ' dependencies' , ' ref_db.sbt.zip')
101101 if not os .path .exists (sourmash_db ):
102102 print (f"Error: Sourmash database not found: { sourmash_db } " )
103103 self .top_header_found = "Sourmash Database Not Found"
@@ -108,6 +108,9 @@ def run(self,):
108108 self .sourmash_df = pd .DataFrame ()
109109 return
110110
111+ # Create signature file path
112+ fastq_sig_file = f'{ self .FASTQ_R1 } .sig'
113+
111114 # Run sourmash sketch
112115 try :
113116 sketch_result = subprocess .run (
@@ -128,16 +131,19 @@ def run(self,):
128131 self .sourmash_df = pd .DataFrame ()
129132 return
130133
134+ # Create search CSV file path
135+ search_csv_file = f'{ self .sample_name } _search.csv'
136+
131137 # Run sourmash search
132138 try :
133139 search_result = subprocess .run (
134140 [
135141 "sourmash" ,
136142 "search" ,
137- f' { self . FASTQ_R1 } .sig' ,
143+ fastq_sig_file ,
138144 sourmash_db ,
139145 "-o" ,
140- f' { self . sample_name } _search.csv' ,
146+ search_csv_file ,
141147 '--threshold=0.001'
142148 ],
143149 capture_output = True ,
@@ -154,22 +160,22 @@ def run(self,):
154160 self .reference_set = None
155161 self .top_fasta_header = "Sourmash Search Failed"
156162 self .sourmash_df = pd .DataFrame ()
157- if os .path .exists (f' { self . FASTQ_R1 } .sig' ):
158- os .remove (f' { self . FASTQ_R1 } .sig' )
163+ if os .path .exists (fastq_sig_file ):
164+ os .remove (fastq_sig_file )
159165 return
160166
161167 # Read search results
162168 try :
163- if os .path .exists (f' { self . sample_name } _search.csv' ) and os .path .getsize (f' { self . sample_name } _search.csv' ) > 0 :
164- self .sourmash_df = pd .read_csv (f' { self . sample_name } _search.csv' )
169+ if os .path .exists (search_csv_file ) and os .path .getsize (search_csv_file ) > 0 :
170+ self .sourmash_df = pd .read_csv (search_csv_file )
165171 else :
166172 print ("Warning: Sourmash search produced no results or empty file" )
167173 self .sourmash_df = pd .DataFrame ()
168174 except Exception as e :
169175 print (f"Error reading sourmash search results: { str (e )} " )
170176 self .sourmash_df = pd .DataFrame ()
171177
172- #Force a top hit to a specific reference, ie TB lineages to
178+ # Force a top hit to a specific reference, ie TB lineages to
173179 try :
174180 self .top_header_found = self .sourmash_df ['name' ][0 ].split ()[0 ] # top hit
175181 except (IndexError , KeyError ):
@@ -212,15 +218,15 @@ def run(self,):
212218 self .top_fasta_header = 'Error reading reference file'
213219
214220 # Create sourmash directory and move results
215- dir = 'sourmash'
216- if not os .path .exists (dir ):
217- os .makedirs (dir )
221+ sourmash_dir = 'sourmash'
222+ if not os .path .exists (sourmash_dir ):
223+ os .makedirs (sourmash_dir )
218224
219- if os .path .exists (f' { self . sample_name } _search.csv' ):
220- shutil .move (f' { self . sample_name } _search.csv' , dir )
225+ if os .path .exists (search_csv_file ):
226+ shutil .move (search_csv_file , sourmash_dir )
221227
222- if os .path .exists (f' { self . FASTQ_R1 } .sig' ):
223- os .remove (f' { self . FASTQ_R1 } .sig' )
228+ if os .path .exists (fastq_sig_file ):
229+ os .remove (fastq_sig_file )
224230
225231 print ("#############\n " )
226232
@@ -246,7 +252,7 @@ def latex(self, tex):
246252 count += 1
247253 if count <= 10 :
248254 percentage = f'{ row [1 ]:.1%} '
249- name = row [4 ].replace ("_" , r"\_" ) if isinstance (row [2 ], str ) else "Invalid Name"
255+ name = row [4 ].replace ("_" , r"\_" ) if isinstance (row [4 ], str ) else "Invalid Name"
250256 print (percentage .replace ("%" , r"\%" ) + ' & ' + name + r' \\' , file = tex )
251257 print (r'\hline' , file = tex )
252258 except Exception as e :
0 commit comments