Skip to content

Commit 17f9726

Browse files
spades.py: auto-set K-mer size message is log.info now; reporting actually used K-mer sizes in the end
1 parent 9cb84d8 commit 17f9726

File tree

3 files changed

+17
-12
lines changed

3 files changed

+17
-12
lines changed

assembler/spades.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ def main(args):
756756
dataset_file.close()
757757
spades_cfg.__dict__["dataset"] = dataset_filename
758758

759-
latest_dir = spades_logic.run_spades(tmp_configs_dir, bin_home, spades_cfg, dataset_data, ext_python_modules_home, log)
759+
used_K = spades_logic.run_spades(tmp_configs_dir, bin_home, spades_cfg, dataset_data, ext_python_modules_home, log)
760760

761761
if os.path.isdir(misc_dir) and not options_storage.continue_mode:
762762
shutil.rmtree(misc_dir)
@@ -768,7 +768,7 @@ def main(args):
768768
if k_str.find(":") != -1:
769769
k_str = k_str[:k_str.find(":")]
770770
support.error("failed to continue from K=%s because this K was not processed in the original run!" % k_str, log)
771-
log.info("\n===== %s finished. \n" % STAGE_NAME)
771+
log.info("\n===== %s finished. Used k-mer sizes: %s \n" % (STAGE_NAME, ', '.join(map(str, used_K))))
772772
if not options_storage.run_completed:
773773
if options_storage.stop_after == 'as' or options_storage.stop_after == 'scc' or (options_storage.stop_after and options_storage.stop_after.startswith('k')):
774774
support.finish_here(log)

assembler/src/spades_pipeline/spades_logic.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,19 +88,19 @@ def update_k_mers_in_special_cases(cur_k_mers, RL, log, silent=False):
8888
if options_storage.auto_K_allowed():
8989
if RL >= 250:
9090
if not silent:
91-
support.warning("Default k-mer sizes were set to %s because estimated "
92-
"read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL), log)
91+
log.info("Default k-mer sizes were set to %s because estimated "
92+
"read length (%d) is equal to or greater than 250" % (str(options_storage.K_MERS_250), RL))
9393
return options_storage.K_MERS_250
9494
if RL >= 150:
9595
if not silent:
96-
support.warning("Default k-mer sizes were set to %s because estimated "
97-
"read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log)
96+
log.info("Default k-mer sizes were set to %s because estimated "
97+
"read length (%d) is equal to or greater than 150" % (str(options_storage.K_MERS_150), RL), log)
9898
return options_storage.K_MERS_150
9999
if RL <= max(cur_k_mers):
100100
new_k_mers = [k for k in cur_k_mers if k < RL]
101101
if not silent:
102-
support.warning("K-mer sizes were set to %s because estimated "
103-
"read length (%d) is less than %d" % (str(new_k_mers), RL, max(cur_k_mers)), log)
102+
log.info("K-mer sizes were set to %s because estimated "
103+
"read length (%d) is less than %d" % (str(new_k_mers), RL, max(cur_k_mers)), log)
104104
return new_k_mers
105105
return cur_k_mers
106106

@@ -201,6 +201,7 @@ def prepare_config_scaffold_correction(filename, cfg, log, saves_dir, K):
201201
#todo
202202
process_cfg.substitute_params(filename, subst_dict, log)
203203

204+
204205
def run_scaffold_correction(configs_dir, execution_home, cfg, log, latest, K):
205206
data_dir = os.path.join(cfg.output_dir, "SCC", "K%d" % K)
206207
saves_dir = os.path.join(data_dir, 'saves')
@@ -232,6 +233,7 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
232233
if not isinstance(cfg.iterative_K, list):
233234
cfg.iterative_K = [cfg.iterative_K]
234235
cfg.iterative_K = sorted(cfg.iterative_K)
236+
used_K = []
235237

236238
# checking and removing conflicting K-mer directories
237239
if options_storage.restart_from:
@@ -249,7 +251,7 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
249251
k_to_delete = []
250252
for id, k in enumerate(needed_K):
251253
if len(processed_K) == id:
252-
if processed_K[-1] == original_K[-1]: # the last K in the original run was processed in "last_one" mode
254+
if processed_K[-1] == original_K[-1]: # the last K in the original run was processed in "last_one" mode
253255
k_to_delete = [original_K[-1]]
254256
break
255257
if processed_K[id] != k:
@@ -272,8 +274,10 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
272274
K = cfg.iterative_K[0]
273275
if len(cfg.iterative_K) == 1:
274276
run_iteration(configs_dir, execution_home, cfg, log, K, None, True)
277+
used_K.append(K)
275278
else:
276279
run_iteration(configs_dir, execution_home, cfg, log, K, None, False)
280+
used_K.append(K)
277281
if options_storage.stop_after == "k%d" % K:
278282
finished_on_stop_after = True
279283
else:
@@ -290,6 +294,7 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
290294
"Rerunning for the first value of K (%d) with Repeat Resolving" %
291295
(cfg.iterative_K[1], RL, cfg.iterative_K[0]), log)
292296
run_iteration(configs_dir, execution_home, cfg, log, cfg.iterative_K[0], None, True)
297+
used_K.append(cfg.iterative_K[0])
293298
K = cfg.iterative_K[0]
294299
else:
295300
rest_of_iterative_K = cfg.iterative_K
@@ -299,6 +304,7 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
299304
count += 1
300305
last_one = count == len(cfg.iterative_K) or (rest_of_iterative_K[count] + 1 > RL)
301306
run_iteration(configs_dir, execution_home, cfg, log, K, prev_K, last_one)
307+
used_K.append(K)
302308
prev_K = K
303309
if last_one:
304310
break
@@ -354,8 +360,6 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
354360
if not os.path.isfile(cfg.result_scaffolds_paths) or not options_storage.continue_mode:
355361
shutil.copyfile(os.path.join(latest, "scaffolds.paths"), cfg.result_scaffolds_paths)
356362

357-
358-
359363
if cfg.developer_mode:
360364
# saves
361365
saves_link = os.path.join(os.path.dirname(cfg.result_contigs), "saves")
@@ -368,4 +372,4 @@ def run_spades(configs_dir, execution_home, cfg, dataset_data, ext_python_module
368372
if os.path.isdir(cfg.tmp_dir):
369373
shutil.rmtree(cfg.tmp_dir)
370374

371-
return latest
375+
return used_K

assembler/src/spades_pipeline/support.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def recreate_dir(dirname):
104104
shutil.rmtree(dirname)
105105
os.makedirs(dirname)
106106

107+
107108
def check_files_duplication(filenames, log):
108109
for filename in filenames:
109110
if filenames.count(filename) != 1:

0 commit comments

Comments
 (0)