@@ -127,89 +127,88 @@ class ApproxKmerIndexer {
127
127
[[nodiscard]] KmerIndexes GetKmerIndexes (const std::vector<Contig> &contigs,
128
128
const kmer_filter::KmerFilter &kmer_filter,
129
129
logging::Logger &logger) const {
130
- KmerIndexes kmer_indexes;
131
- for (auto it = contigs.cbegin (); it!= contigs.cend (); ++it) {
132
- const Contig &contig{*it};
133
- logger.info () << " Creating index for contig " << contig.id << " \n " ;
134
- kmer_indexes.emplace_back (GetKmerIndex (contig,
135
- kmer_filter,
136
- it - contigs.cbegin (),
137
- logger));
138
- }
139
- return kmer_indexes;
130
+ KmerIndexes kmer_indexes;
131
+ for (auto it = contigs.cbegin (); it != contigs.cend (); ++it) {
132
+ const Contig &contig{*it};
133
+ logger.info () << " Creating index for contig " << contig.id << " \n " ;
134
+ kmer_indexes.emplace_back (GetKmerIndex (contig,
135
+ kmer_filter,
136
+ it - contigs.cbegin (),
137
+ logger));
138
+ }
139
+ return kmer_indexes;
140
140
}
141
141
142
- void BanHighFreqUniqueKmers (const std::vector<Contig> &contigs,
143
- const std::vector<Contig> &readset,
144
- KmerIndexes &kmer_indexes,
145
- logging::Logger &logger) const {
146
-
147
- // ban unique k-mers in assembly that have unusually high coverage
148
- const double coverage
149
- {tools::common::coverage_utils::get_coverage (contigs, readset)};
150
- const uint max_read_freq = std::max (1 .,
151
- ceil (kmer_indexer_params
152
- .careful_upper_bnd_cov_mult
153
- *coverage));
154
-
155
- Counter kmer_cnt;
156
- for (auto it = readset.begin (); it!=readset.end (); ++it) {
157
- logger.trace () << it - readset.begin () << " " << readset.size ()
158
- << " \n " ;
159
- const Contig &contig = *it;
160
- if (contig.size () < hasher.k ) {
161
- continue ;
162
- }
163
- KWH<htype> kwh (hasher, contig.seq , 0 );
164
- while (true ) {
165
- if (!kwh.hasNext ()) {
166
- break ;
167
- }
168
- kwh = kwh.next ();
169
- const htype fhash = kwh.get_fhash ();
170
- const htype rhash = kwh.get_rhash ();
171
- for (const htype hash : std::vector<htype>{fhash, rhash}) {
172
- bool is_unique = false ;
173
- for (const KmerIndex &index : kmer_indexes) {
174
- auto it = index.find (hash);
175
- if (it!=index.end () and it->second .size ()==1 ) {
176
- is_unique = true ;
177
- break ;
178
- }
179
- }
180
- if (is_unique) {
181
- kmer_cnt[hash] += 1 ;
182
- }
183
- }
142
+ void BanHighFreqUniqueKmers (const std::vector<Contig> &contigs,
143
+ const std::vector<Contig> &readset,
144
+ KmerIndexes &kmer_indexes,
145
+ logging::Logger &logger) const {
146
+
147
+ // ban unique k-mers in assembly that have unusually high coverage
148
+ const double coverage{tools::common::coverage_utils::get_coverage (contigs, readset)};
149
+ const uint max_read_freq = std::max (1 .,
150
+ ceil (kmer_indexer_params
151
+ .careful_upper_bnd_cov_mult
152
+ * coverage));
153
+
154
+ Counter kmer_cnt;
155
+ for (auto it = readset.begin (); it != readset.end (); ++it) {
156
+ logger.trace () << it - readset.begin () << " " << readset.size ()
157
+ << " \n " ;
158
+ const Contig &contig = *it;
159
+ if (contig.size () < hasher.k ) {
160
+ continue ;
161
+ }
162
+ KWH<htype> kwh (hasher, contig.seq , 0 );
163
+ while (true ) {
164
+ if (!kwh.hasNext ()) {
165
+ break ;
166
+ }
167
+ kwh = kwh.next ();
168
+ const htype fhash = kwh.get_fhash ();
169
+ const htype rhash = kwh.get_rhash ();
170
+ for (const htype hash : std::vector<htype>{fhash, rhash}) {
171
+ bool is_unique = false ;
172
+ for (const KmerIndex &index : kmer_indexes) {
173
+ auto it = index.find (hash);
174
+ if (it != index.end () and it->second .size () == 1 ) {
175
+ is_unique = true ;
176
+ break ;
184
177
}
178
+ }
179
+ if (is_unique) {
180
+ kmer_cnt[hash] += 1 ;
181
+ }
185
182
}
183
+ }
184
+ }
186
185
187
- uint64_t n{0 };
188
- for (auto &[hash, cnt] : kmer_cnt) {
189
- if (cnt > max_read_freq) {
190
- for (KmerIndex &index : kmer_indexes) {
191
- auto it = index.find (hash);
192
- if (it!=index.end ()) {
193
- index.erase (it);
194
- break ;
195
- }
196
- }
197
- ++n;
198
- }
186
+ uint64_t n{0 };
187
+ for (auto &[hash, cnt] : kmer_cnt) {
188
+ if (cnt > max_read_freq) {
189
+ for (KmerIndex &index : kmer_indexes) {
190
+ auto it = index.find (hash);
191
+ if (it != index.end ()) {
192
+ index.erase (it);
193
+ break ;
194
+ }
199
195
}
200
- logger.info () << " Filtered " << n << " high multiplicity k-mers\n " ;
196
+ ++n;
197
+ }
201
198
}
199
+ logger.info () << " Filtered " << n << " high multiplicity k-mers\n " ;
200
+ }
202
201
203
202
public:
204
- ApproxKmerIndexer (const size_t nthreads,
205
- const RollingHash<htype> &hasher,
206
- const Config::CommonParams &common_params,
207
- const Config::KmerIndexerParams &kmer_indexer_params)
208
- : nthreads{nthreads},
209
- hasher{hasher},
210
- common_params{common_params},
211
- kmer_indexer_params{
212
- kmer_indexer_params} {}
203
+ ApproxKmerIndexer (const size_t nthreads,
204
+ const RollingHash<htype> &hasher,
205
+ const Config::CommonParams &common_params,
206
+ const Config::KmerIndexerParams &kmer_indexer_params)
207
+ : nthreads{nthreads},
208
+ hasher{hasher},
209
+ common_params{common_params},
210
+ kmer_indexer_params{
211
+ kmer_indexer_params} {}
213
212
214
213
ApproxKmerIndexer (const ApproxKmerIndexer &) = delete ;
215
214
ApproxKmerIndexer (ApproxKmerIndexer &&) = delete ;
@@ -219,22 +218,21 @@ class ApproxKmerIndexer {
219
218
[[nodiscard]] KmerIndexes extract (const std::vector<Contig> &contigs,
220
219
const std::optional<std::vector<Contig>> &readset_optional,
221
220
logging::Logger &logger) const {
222
- const kmer_filter::KmerFilterBuilder kmer_filter_builder
223
- {nthreads, hasher, common_params, kmer_indexer_params};
224
- logger.info () << " Creating kmer filter\n " ;
225
- const kmer_filter::KmerFilter
226
- kmer_filter = kmer_filter_builder.GetKmerFilter (contigs, logger);
221
+ const kmer_filter::KmerFilterBuilder kmer_filter_builder{nthreads, hasher, common_params, kmer_indexer_params};
222
+ logger.info () << " Creating kmer filter\n " ;
223
+ const kmer_filter::KmerFilter
224
+ kmer_filter = kmer_filter_builder.GetKmerFilter (contigs, logger);
225
+ logger.info ()
226
+ << " Finished creating kmer filter. Using it to build kmer indexes\n " ;
227
+ KmerIndexes kmer_indexes = GetKmerIndexes (contigs, kmer_filter, logger);
228
+ if (readset_optional.has_value ()) {
229
+ // Careful mode
227
230
logger.info ()
228
- << " Finished creating kmer filter. Using it to build kmer indexes\n " ;
229
- KmerIndexes kmer_indexes = GetKmerIndexes (contigs, kmer_filter, logger);
230
- if (readset_optional.has_value ()) {
231
- // Careful mode
232
- logger.info ()
233
- << " Careful mode requested. Filtering high multiplicity unique k-mers\n " ;
234
- const std::vector<Contig> &readset = readset_optional.value ();
235
- BanHighFreqUniqueKmers (contigs, readset, kmer_indexes, logger);
236
- }
237
- return kmer_indexes;
231
+ << " Careful mode requested. Filtering high multiplicity unique k-mers\n " ;
232
+ const std::vector<Contig> &readset = readset_optional.value ();
233
+ BanHighFreqUniqueKmers (contigs, readset, kmer_indexes, logger);
234
+ }
235
+ return kmer_indexes;
238
236
}
239
237
};
240
238
0 commit comments