From e220688c5c9173668789c8a781280691b35820c7 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Tue, 28 Apr 2026 12:54:46 -0700 Subject: [PATCH 1/2] Introduce frugal mode --- src/common/alignment/kmer_mapper.hpp | 1 - src/common/configs/config_struct.cpp | 15 ++++++++++---- src/common/configs/config_struct.hpp | 1 + src/common/pipeline/graph_pack_helpers.cpp | 20 ++++++++++--------- src/common/pipeline/graph_pack_helpers.h | 3 ++- src/common/stages/ss_edge_split.cpp | 2 +- src/projects/hpcspades/pipeline.cpp | 2 +- src/projects/spades/configs/config.info | 3 +++ src/projects/spades/hybrid_aligning.cpp | 2 +- src/projects/spades/pair_info_count.cpp | 2 +- src/projects/spades/pipeline.cpp | 4 +++- .../spades_pipeline/options_parser.py | 19 +++++++++++++++--- .../stages/spades_iteration_stage.py | 1 + .../spades_pipeline/stages/spades_stage.py | 1 + .../spades/restricted_edges_filling.cpp | 2 +- 15 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/common/alignment/kmer_mapper.hpp b/src/common/alignment/kmer_mapper.hpp index 7121b31e63..fc78f334a0 100644 --- a/src/common/alignment/kmer_mapper.hpp +++ b/src/common/alignment/kmer_mapper.hpp @@ -161,7 +161,6 @@ class KmerMapper : public omnigraph::GraphActionHandler { } Kmer Substitute(const Kmer &kmer) const { - VERIFY(this->IsAttached()); const auto *rawval = mapping_.find(kmer); if (rawval == nullptr) return kmer; diff --git a/src/common/configs/config_struct.cpp b/src/common/configs/config_struct.cpp index 6683ffb90b..4b061a219d 100644 --- a/src/common/configs/config_struct.cpp +++ b/src/common/configs/config_struct.cpp @@ -724,6 +724,7 @@ void load_launch_info(debruijn_config &cfg, boost::property_tree::ptree const &p load(cfg.rr_enable, pt, "rr_enable"); load(cfg.gfa11, pt, "gfa11"); + load(cfg.frugal, pt, "frugal"); load(cfg.temp_bin_reads_dir, pt, "temp_bin_reads_dir"); @@ -880,10 +881,16 @@ void load(debruijn_config &cfg, const std::vector &cfg_fn cfg.pe_params.param_set.scaffolder_options.enabled = false; } - cfg.need_mapping = cfg.developer_mode || cfg.correct_mismatches || - cfg.gap_closer_enable || cfg.rr_enable || - cfg.ss_coverage_splitter.enabled; - + // K-mer mapper is always attached in developer mode or in the case when read + // mapping would be needed (unless in frugal mode). + cfg.need_mapping = cfg.developer_mode || + (!cfg.frugal && + (cfg.correct_mismatches || // uses k-mer mapper directly + cfg.gap_closer_enable || // maps reads + cfg.rr_enable || // maps reads + cfg.ss_coverage_splitter.enabled // maps reads + ) + ); cfg.output_dir = cfg.output_base / ("K" + std::to_string(cfg.K)); cfg.output_saves = cfg.output_dir / "saves"; diff --git a/src/common/configs/config_struct.hpp b/src/common/configs/config_struct.hpp index 9129d0f4f9..d2e32ce51d 100644 --- a/src/common/configs/config_struct.hpp +++ b/src/common/configs/config_struct.hpp @@ -517,6 +517,7 @@ struct debruijn_config { time_tracing tt; bool need_mapping; + bool frugal = false; debruijn_config() : use_single_reads(false) { diff --git a/src/common/pipeline/graph_pack_helpers.cpp b/src/common/pipeline/graph_pack_helpers.cpp index 64169487ce..74f6f1761d 100644 --- a/src/common/pipeline/graph_pack_helpers.cpp +++ b/src/common/pipeline/graph_pack_helpers.cpp @@ -5,10 +5,9 @@ //* See file LICENSE for details. //*************************************************************************** +#include "graph_pack_helpers.h" #include "graph_pack.hpp" -#include "genomic_info.hpp" - #include "alignment/edge_index.hpp" #include "alignment/kmer_mapper.hpp" #include "alignment/long_read_storage.hpp" @@ -46,14 +45,17 @@ void EnsureIndex(GraphPack& gp) { index.Attach(); } -void EnsureBasicMapping(GraphPack& gp) { - auto &kmer_mapper = gp.get_mutable>(); - - VERIFY(kmer_mapper.IsAttached()); +void EnsureBasicMapping(GraphPack& gp, bool check_mapper) { EnsureIndex(gp); - INFO("Normalizing k-mer map. Total " << kmer_mapper.size() << " kmers to process"); - kmer_mapper.Normalize(); - INFO("Normalizing done"); + + if (check_mapper) { + auto &kmer_mapper = gp.get_mutable>(); + VERIFY(kmer_mapper.IsAttached()); + + INFO("Normalizing k-mer map. Total " << kmer_mapper.size() << " kmers to process"); + kmer_mapper.Normalize(); + INFO("Normalizing done"); + } } void EnsureQuality(GraphPack& gp) { diff --git a/src/common/pipeline/graph_pack_helpers.h b/src/common/pipeline/graph_pack_helpers.h index db164d9f99..1ea07daa2b 100644 --- a/src/common/pipeline/graph_pack_helpers.h +++ b/src/common/pipeline/graph_pack_helpers.h @@ -16,7 +16,8 @@ void FillQuality(graph_pack::GraphPack& gp); void ClearQuality(graph_pack::GraphPack& gp); void EnsureIndex(graph_pack::GraphPack& gp); -void EnsureBasicMapping(graph_pack::GraphPack& gp); +void EnsureBasicMapping(graph_pack::GraphPack& gp, + bool check_mapper = true); void EnsureQuality(graph_pack::GraphPack& gp); void EnsurePos(graph_pack::GraphPack& gp); void EnsureDebugInfo(graph_pack::GraphPack& gp); diff --git a/src/common/stages/ss_edge_split.cpp b/src/common/stages/ss_edge_split.cpp index ca5c3818b6..aed6301b9f 100644 --- a/src/common/stages/ss_edge_split.cpp +++ b/src/common/stages/ss_edge_split.cpp @@ -25,7 +25,7 @@ void SSEdgeSplit::run(graph_pack::GraphPack& gp, const char *) { return; } - EnsureBasicMapping(gp); + EnsureBasicMapping(gp, !cfg::get().frugal); for (size_t i = 0; i < cfg::get().ds.reads.lib_count(); ++i) { auto &lib = cfg::get_writable().ds.reads[i]; diff --git a/src/projects/hpcspades/pipeline.cpp b/src/projects/hpcspades/pipeline.cpp index c2d1d14c29..6ba7e62ba7 100644 --- a/src/projects/hpcspades/pipeline.cpp +++ b/src/projects/hpcspades/pipeline.cpp @@ -180,7 +180,7 @@ static void AddSimplificationStages(spades::StageManager &SPAdes) { SPAdes.add(); - if (cfg::get().correct_mismatches) + if (cfg::get().correct_mismatches && !cfg::get().frugal) SPAdes.add(); if (cfg::get().ss_coverage_splitter.enabled) diff --git a/src/projects/spades/configs/config.info b/src/projects/spades/configs/config.info index 150d4ee7d1..5a787b9350 100644 --- a/src/projects/spades/configs/config.info +++ b/src/projects/spades/configs/config.info @@ -51,6 +51,9 @@ min_edge_length_for_is_count 0 ; Whether GFA v1.2 (jump links) or GFA v1.1 (scaffold segments) graphs is written gfa11 false +; Enable / Disable memory frugal mode +frugal false + ;preserve raw paired index after distance estimation preserve_raw_paired_index false diff --git a/src/projects/spades/hybrid_aligning.cpp b/src/projects/spades/hybrid_aligning.cpp index 9f534fd578..85567de96b 100644 --- a/src/projects/spades/hybrid_aligning.cpp +++ b/src/projects/spades/hybrid_aligning.cpp @@ -356,7 +356,7 @@ void HybridLibrariesAligning::run(graph_pack::GraphPack& gp, const char*) { path_storage, gap_storage, cfg::get().max_threads, cfg::get().pb); } else { - EnsureBasicMapping(gp); + EnsureBasicMapping(gp, !cfg::get().frugal); gap_closing::GapTrackingListener mapping_listener(graph, gap_storage); INFO("Processing reads from hybrid library " << lib_id); diff --git a/src/projects/spades/pair_info_count.cpp b/src/projects/spades/pair_info_count.cpp index 2c201a1a73..8631f2dfdf 100644 --- a/src/projects/spades/pair_info_count.cpp +++ b/src/projects/spades/pair_info_count.cpp @@ -153,7 +153,7 @@ void PairInfoCountBase::execute(graph_pack::GraphPack &gp, const char *, const MapLibBase &map_lib_func, size_t num_readers) { InitRRIndices(gp); - EnsureBasicMapping(gp); + EnsureBasicMapping(gp, !cfg::get().frugal); const auto &graph = gp.get(); diff --git a/src/projects/spades/pipeline.cpp b/src/projects/spades/pipeline.cpp index d7654f706b..acff764080 100644 --- a/src/projects/spades/pipeline.cpp +++ b/src/projects/spades/pipeline.cpp @@ -179,7 +179,7 @@ static void AddSimplificationStages(StageManager &SPAdes) { SPAdes.add(); - if (cfg::get().correct_mismatches) + if (cfg::get().correct_mismatches && !cfg::get().frugal) SPAdes.add(); if (cfg::get().ss_coverage_splitter.enabled) @@ -255,6 +255,8 @@ void assemble_genome() { if (cfg::get().need_mapping) { INFO("Will need read mapping, kmer mapper will be attached"); conj_gp.get_mutable>().Attach(); + } else if (cfg::get().frugal) { + INFO("Memory frugal model is enabled, will NOT attach kmer mapper"); } // Build the pipeline diff --git a/src/projects/spades/pipeline/spades_pipeline/options_parser.py b/src/projects/spades/pipeline/spades_pipeline/options_parser.py index fd7f4ae245..c32ebe2f0c 100644 --- a/src/projects/spades/pipeline/spades_pipeline/options_parser.py +++ b/src/projects/spades/pipeline/spades_pipeline/options_parser.py @@ -353,7 +353,7 @@ def add_basic_args(pgroup_basic): help="runs metaplasmidSPAdes pipeline for plasmid detection in metagenomic datasets " "(equivalent for --meta --plasmid)" if not help_hidden else argparse.SUPPRESS, - action="store_true") + action="store_true") pgroup_basic.add_argument("--rnaviral", dest="rnaviral", help="this flag enables virus assembly module from RNA-Seq data" @@ -694,6 +694,18 @@ def add_advanced_args(pgroup_advanced): help=argparse.SUPPRESS, action="store_false") + frugal = pgroup_advanced.add_mutually_exclusive_group() + frugal.add_argument("--frugal", + dest="frugal", + default=None, + help="be memory frugal (at the expense of the possible assembly quality)", + action="store_true") + frugal.add_argument("--frugal:false", + dest="frugal", + default=None, + help=argparse.SUPPRESS, + action="store_false") + def add_hidden_args(pgroup_hidden): show_help_hidden = ("--help-hidden" in sys.argv) @@ -1092,6 +1104,7 @@ def add_to_cfg(cfg, bin_home, spades_home, args): if args.read_buffer_size: cfg["assembly"].__dict__["read_buffer_size"] = args.read_buffer_size cfg["assembly"].__dict__["gfa11"] = args.gfa11 + cfg["assembly"].__dict__["frugal"] = args.frugal # corrector can work only if contigs exist (not only error correction) if (not args.only_error_correction) and args.mismatch_corrector: @@ -1223,7 +1236,7 @@ def postprocessing(args, dataset_data, spades_home, load_processed_dataset, rest file_operations.get_lib_ids_by_type(dataset_data, "assembly-graph"))) long_read_libs = max(1, len( file_operations.get_lib_ids_by_type(dataset_data, ["pacbio", "nanopore"]))) - + if len(dataset_data) > paired_end_libs + graph_libs + long_read_libs: support.error("you cannot specify any data types except a single paired-end library " "(optionally accompanied by a single library of " @@ -1307,7 +1320,7 @@ def set_default_values(): if options_storage.args.developer_mode is None: options_storage.args.developer_mode = False if options_storage.args.time_tracer is None: - options_storage.args.time_tracer = False + options_storage.args.time_tracer = False if options_storage.args.qvoffset == "auto": options_storage.args.qvoffset = None if options_storage.args.cov_cutoff is None: diff --git a/src/projects/spades/pipeline/spades_pipeline/stages/spades_iteration_stage.py b/src/projects/spades/pipeline/spades_pipeline/stages/spades_iteration_stage.py index 81dc68819a..4569ba2069 100644 --- a/src/projects/spades/pipeline/spades_pipeline/stages/spades_iteration_stage.py +++ b/src/projects/spades/pipeline/spades_pipeline/stages/spades_iteration_stage.py @@ -70,6 +70,7 @@ def prepare_config_spades(filename, cfg, additional_contigs_fname, K, stage, sav subst_dict["gap_closer_enable"] = bool_to_str(last_one or K >= options_storage.GAP_CLOSER_ENABLE_MIN_K) subst_dict["rr_enable"] = bool_to_str(last_one and cfg.rr_enable) subst_dict["gfa11"] = bool_to_str(cfg.gfa11) + subst_dict["frugal"] = bool_to_str(cfg.frugal) # subst_dict["topology_simplif_enabled"] = bool_to_str(last_one) subst_dict["max_threads"] = cfg.max_threads subst_dict["max_memory"] = cfg.max_memory diff --git a/src/projects/spades/pipeline/spades_pipeline/stages/spades_stage.py b/src/projects/spades/pipeline/spades_pipeline/stages/spades_stage.py index 3d4e50f03f..1b180b6d4b 100644 --- a/src/projects/spades/pipeline/spades_pipeline/stages/spades_stage.py +++ b/src/projects/spades/pipeline/spades_pipeline/stages/spades_stage.py @@ -367,6 +367,7 @@ def generate_cfg(self, cfg, output_files): self.cfg.__dict__["rr_enable"] = True self.cfg.__dict__["gfa11"] = self.cfg.gfa11 + self.cfg.__dict__["frugal"] = self.cfg.frugal dataset_filename = os.path.join(self.cfg.output_dir, "dataset.info") self.cfg.__dict__["dataset"] = dataset_filename diff --git a/src/projects/spades/restricted_edges_filling.cpp b/src/projects/spades/restricted_edges_filling.cpp index e630f1c9bf..9819c44561 100644 --- a/src/projects/spades/restricted_edges_filling.cpp +++ b/src/projects/spades/restricted_edges_filling.cpp @@ -43,7 +43,7 @@ static void MapRestrictedEdgesFromTrustedContigs(graph_pack::GraphPack &gp) { if (!gp.get_mutable>().IsAttached()) gp.get_mutable>().Attach(); - EnsureBasicMapping(gp); + EnsureBasicMapping(gp, !cfg::get().frugal); std::vector trusted_contigs; for (size_t lib_id = 0; lib_id < cfg::get().ds.reads.lib_count(); ++lib_id) { if (cfg::get().ds.reads[lib_id].type() == io::LibraryType::TrustedContigs) From 122b5a98acce2f59f6b72d84b182707b97a75d33 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Thu, 7 May 2026 15:40:11 -0700 Subject: [PATCH 2/2] Enable frugal mode for hpcSPAdes by default --- src/projects/spades/pipeline/spades_pipeline/options_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/projects/spades/pipeline/spades_pipeline/options_parser.py b/src/projects/spades/pipeline/spades_pipeline/options_parser.py index c32ebe2f0c..4bd1463f05 100644 --- a/src/projects/spades/pipeline/spades_pipeline/options_parser.py +++ b/src/projects/spades/pipeline/spades_pipeline/options_parser.py @@ -1104,7 +1104,8 @@ def add_to_cfg(cfg, bin_home, spades_home, args): if args.read_buffer_size: cfg["assembly"].__dict__["read_buffer_size"] = args.read_buffer_size cfg["assembly"].__dict__["gfa11"] = args.gfa11 - cfg["assembly"].__dict__["frugal"] = args.frugal + # Default value depends on grid engine : local / non-local + cfg["assembly"].__dict__["frugal"] = args.frugal if args.frugal else args.grid_engine != "local" # corrector can work only if contigs exist (not only error correction) if (not args.only_error_correction) and args.mismatch_corrector: