Skip to content

Commit b4ef20e

Browse files
committed
Proper formatting enforced
1 parent e7fb7e7 commit b4ef20e

File tree

4 files changed

+183
-187
lines changed

4 files changed

+183
-187
lines changed

veritymap/src/projects/veritymap/kmer_index/approx_kmer_indexer.hpp

Lines changed: 87 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -127,89 +127,88 @@ class ApproxKmerIndexer {
127127
[[nodiscard]] KmerIndexes GetKmerIndexes(const std::vector<Contig> &contigs,
128128
const kmer_filter::KmerFilter &kmer_filter,
129129
logging::Logger &logger) const {
130-
KmerIndexes kmer_indexes;
131-
for (auto it = contigs.cbegin(); it!=contigs.cend(); ++it) {
132-
const Contig &contig{*it};
133-
logger.info() << "Creating index for contig " << contig.id << "\n";
134-
kmer_indexes.emplace_back(GetKmerIndex(contig,
135-
kmer_filter,
136-
it - contigs.cbegin(),
137-
logger));
138-
}
139-
return kmer_indexes;
130+
KmerIndexes kmer_indexes;
131+
for (auto it = contigs.cbegin(); it != contigs.cend(); ++it) {
132+
const Contig &contig{*it};
133+
logger.info() << "Creating index for contig " << contig.id << "\n";
134+
kmer_indexes.emplace_back(GetKmerIndex(contig,
135+
kmer_filter,
136+
it - contigs.cbegin(),
137+
logger));
138+
}
139+
return kmer_indexes;
140140
}
141141

142-
void BanHighFreqUniqueKmers(const std::vector<Contig> &contigs,
143-
const std::vector<Contig> &readset,
144-
KmerIndexes &kmer_indexes,
145-
logging::Logger &logger) const {
146-
147-
// ban unique k-mers in assembly that have unusually high coverage
148-
const double coverage
149-
{tools::common::coverage_utils::get_coverage(contigs, readset)};
150-
const uint max_read_freq = std::max(1.,
151-
ceil(kmer_indexer_params
152-
.careful_upper_bnd_cov_mult
153-
*coverage));
154-
155-
Counter kmer_cnt;
156-
for (auto it = readset.begin(); it!=readset.end(); ++it) {
157-
logger.trace() << it - readset.begin() << " " << readset.size()
158-
<< "\n";
159-
const Contig &contig = *it;
160-
if (contig.size() < hasher.k) {
161-
continue;
162-
}
163-
KWH<htype> kwh(hasher, contig.seq, 0);
164-
while (true) {
165-
if (!kwh.hasNext()) {
166-
break;
167-
}
168-
kwh = kwh.next();
169-
const htype fhash = kwh.get_fhash();
170-
const htype rhash = kwh.get_rhash();
171-
for (const htype hash : std::vector<htype>{fhash, rhash}) {
172-
bool is_unique = false;
173-
for (const KmerIndex &index : kmer_indexes) {
174-
auto it = index.find(hash);
175-
if (it!=index.end() and it->second.size()==1) {
176-
is_unique = true;
177-
break;
178-
}
179-
}
180-
if (is_unique) {
181-
kmer_cnt[hash] += 1;
182-
}
183-
}
142+
void BanHighFreqUniqueKmers(const std::vector<Contig> &contigs,
143+
const std::vector<Contig> &readset,
144+
KmerIndexes &kmer_indexes,
145+
logging::Logger &logger) const {
146+
147+
// ban unique k-mers in assembly that have unusually high coverage
148+
const double coverage{tools::common::coverage_utils::get_coverage(contigs, readset)};
149+
const uint max_read_freq = std::max(1.,
150+
ceil(kmer_indexer_params
151+
.careful_upper_bnd_cov_mult
152+
* coverage));
153+
154+
Counter kmer_cnt;
155+
for (auto it = readset.begin(); it != readset.end(); ++it) {
156+
logger.trace() << it - readset.begin() << " " << readset.size()
157+
<< "\n";
158+
const Contig &contig = *it;
159+
if (contig.size() < hasher.k) {
160+
continue;
161+
}
162+
KWH<htype> kwh(hasher, contig.seq, 0);
163+
while (true) {
164+
if (!kwh.hasNext()) {
165+
break;
166+
}
167+
kwh = kwh.next();
168+
const htype fhash = kwh.get_fhash();
169+
const htype rhash = kwh.get_rhash();
170+
for (const htype hash : std::vector<htype>{fhash, rhash}) {
171+
bool is_unique = false;
172+
for (const KmerIndex &index : kmer_indexes) {
173+
auto it = index.find(hash);
174+
if (it != index.end() and it->second.size() == 1) {
175+
is_unique = true;
176+
break;
184177
}
178+
}
179+
if (is_unique) {
180+
kmer_cnt[hash] += 1;
181+
}
185182
}
183+
}
184+
}
186185

187-
uint64_t n{0};
188-
for (auto &[hash, cnt] : kmer_cnt) {
189-
if (cnt > max_read_freq) {
190-
for (KmerIndex &index : kmer_indexes) {
191-
auto it = index.find(hash);
192-
if (it!=index.end()) {
193-
index.erase(it);
194-
break;
195-
}
196-
}
197-
++n;
198-
}
186+
uint64_t n{0};
187+
for (auto &[hash, cnt] : kmer_cnt) {
188+
if (cnt > max_read_freq) {
189+
for (KmerIndex &index : kmer_indexes) {
190+
auto it = index.find(hash);
191+
if (it != index.end()) {
192+
index.erase(it);
193+
break;
194+
}
199195
}
200-
logger.info() << "Filtered " << n << " high multiplicity k-mers\n";
196+
++n;
197+
}
201198
}
199+
logger.info() << "Filtered " << n << " high multiplicity k-mers\n";
200+
}
202201

203202
public:
204-
ApproxKmerIndexer(const size_t nthreads,
205-
const RollingHash<htype> &hasher,
206-
const Config::CommonParams &common_params,
207-
const Config::KmerIndexerParams &kmer_indexer_params)
208-
: nthreads{nthreads},
209-
hasher{hasher},
210-
common_params{common_params},
211-
kmer_indexer_params{
212-
kmer_indexer_params} {}
203+
ApproxKmerIndexer(const size_t nthreads,
204+
const RollingHash<htype> &hasher,
205+
const Config::CommonParams &common_params,
206+
const Config::KmerIndexerParams &kmer_indexer_params)
207+
: nthreads{nthreads},
208+
hasher{hasher},
209+
common_params{common_params},
210+
kmer_indexer_params{
211+
kmer_indexer_params} {}
213212

214213
ApproxKmerIndexer(const ApproxKmerIndexer &) = delete;
215214
ApproxKmerIndexer(ApproxKmerIndexer &&) = delete;
@@ -219,22 +218,21 @@ class ApproxKmerIndexer {
219218
[[nodiscard]] KmerIndexes extract(const std::vector<Contig> &contigs,
220219
const std::optional<std::vector<Contig>> &readset_optional,
221220
logging::Logger &logger) const {
222-
const kmer_filter::KmerFilterBuilder kmer_filter_builder
223-
{nthreads, hasher, common_params, kmer_indexer_params};
224-
logger.info() << "Creating kmer filter\n";
225-
const kmer_filter::KmerFilter
226-
kmer_filter = kmer_filter_builder.GetKmerFilter(contigs, logger);
221+
const kmer_filter::KmerFilterBuilder kmer_filter_builder{nthreads, hasher, common_params, kmer_indexer_params};
222+
logger.info() << "Creating kmer filter\n";
223+
const kmer_filter::KmerFilter
224+
kmer_filter = kmer_filter_builder.GetKmerFilter(contigs, logger);
225+
logger.info()
226+
<< "Finished creating kmer filter. Using it to build kmer indexes\n";
227+
KmerIndexes kmer_indexes = GetKmerIndexes(contigs, kmer_filter, logger);
228+
if (readset_optional.has_value()) {
229+
// Careful mode
227230
logger.info()
228-
<< "Finished creating kmer filter. Using it to build kmer indexes\n";
229-
KmerIndexes kmer_indexes = GetKmerIndexes(contigs, kmer_filter, logger);
230-
if (readset_optional.has_value()) {
231-
// Careful mode
232-
logger.info()
233-
<< "Careful mode requested. Filtering high multiplicity unique k-mers\n";
234-
const std::vector<Contig> &readset = readset_optional.value();
235-
BanHighFreqUniqueKmers(contigs, readset, kmer_indexes, logger);
236-
}
237-
return kmer_indexes;
231+
<< "Careful mode requested. Filtering high multiplicity unique k-mers\n";
232+
const std::vector<Contig> &readset = readset_optional.value();
233+
BanHighFreqUniqueKmers(contigs, readset, kmer_indexes, logger);
234+
}
235+
return kmer_indexes;
238236
}
239237
};
240238

veritymap/src/projects/veritymap/kmer_index/target_indexer.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ get_indexed_targets(const std::optional<std::vector<Contig>> &queries,
6363

6464
IndexedContigs indexed_targets;
6565
for (auto it = kmers_indexes.begin(); it != kmers_indexes.end(); ++it) {
66-
const Contig& target = targets.at(it - kmers_indexes.begin());
66+
const Contig &target = targets.at(it - kmers_indexes.begin());
6767
indexed_targets.emplace_back(target, hasher, kmer_indexer_params.max_rare_cnt_target, std::move(*it));
6868
}
6969

@@ -72,7 +72,7 @@ get_indexed_targets(const std::optional<std::vector<Contig>> &queries,
7272
kmer_indexes_os.close();
7373
logger.info() << "Kmer indexes are exported to " << kmer_indexes_fn << std::endl;
7474

75-
for (const auto& indexed_target : indexed_targets) {
75+
for (const auto &indexed_target : indexed_targets) {
7676
logger.info() << "Target " << indexed_target.get_contig().id
7777
<< ", # Rare kmers = " << indexed_target.get_kmer_index().size() << std::endl;
7878
}

veritymap/src/projects/veritymap/veritymap.cpp

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,46 +47,46 @@ int main(int argc, char** argv) {
4747
std::time_t now = std::chrono::system_clock::to_time_t(time_point);
4848
logger << "Launch time: " << std::put_time(std::localtime(&now), "%c %Z") << std::endl;
4949

50-
std::stringstream cmd_ss;
51-
for (size_t i = 0; i < argc; i++) {
52-
cmd_ss << argv[i] << " ";
53-
}
54-
const std::string cmd = cmd_ss.str();
55-
logger << "CMD: " << cmd << std::endl;
56-
57-
const std::filesystem::path target_path =
58-
std::filesystem::canonical(parser.getValue("target"));
59-
60-
auto get_path_w_def = [&parser](const std::string &parameter) {
61-
std::filesystem::path path = parser.getValue(parameter);
62-
if (path!="none") {
63-
path = std::filesystem::canonical(path);
64-
} else {
65-
path = "";
66-
}
67-
return path;
68-
};
69-
const std::filesystem::path queries_path = get_path_w_def("queries");
70-
71-
bool to_compress = parser.getCheck("compress");
72-
bool only_index = parser.getCheck("only-index");
73-
bool careful_mode = parser.getCheck("careful");
74-
if (careful_mode and queries_path=="") {
75-
std::cerr << "Cannot use careful mode if no queries are provided\n";
76-
return 1;
50+
std::stringstream cmd_ss;
51+
for (size_t i = 0; i < argc; i++) {
52+
cmd_ss << argv[i] << " ";
53+
}
54+
const std::string cmd = cmd_ss.str();
55+
logger << "CMD: " << cmd << std::endl;
56+
57+
const std::filesystem::path target_path =
58+
std::filesystem::canonical(parser.getValue("target"));
59+
60+
auto get_path_w_def = [&parser](const std::string& parameter) {
61+
std::filesystem::path path = parser.getValue(parameter);
62+
if (path != "none") {
63+
path = std::filesystem::canonical(path);
64+
} else {
65+
path = "";
7766
}
67+
return path;
68+
};
69+
const std::filesystem::path queries_path = get_path_w_def("queries");
70+
71+
bool to_compress = parser.getCheck("compress");
72+
bool only_index = parser.getCheck("only-index");
73+
bool careful_mode = parser.getCheck("careful");
74+
if (careful_mode and queries_path == "") {
75+
std::cerr << "Cannot use careful mode if no queries are provided\n";
76+
return 1;
77+
}
78+
79+
const std::filesystem::path index_path = get_path_w_def("index");
7880

79-
const std::filesystem::path index_path = get_path_w_def("index");
80-
81-
const std::filesystem::path binary_path = argv[0];
82-
const std::filesystem::path config_fn = [&parser, &logger, &binary_path] {
83-
std::string config = parser.getValue("config");
84-
std::filesystem::path dirpath = binary_path.parent_path();
85-
if (config=="hifi") {
86-
return dirpath/"config/config_tm2_hifi.tsv";
87-
} else if (config=="ont") {
88-
return dirpath/"config/config_tm2_ont.tsv";
89-
}
81+
const std::filesystem::path binary_path = argv[0];
82+
const std::filesystem::path config_fn = [&parser, &logger, &binary_path] {
83+
std::string config = parser.getValue("config");
84+
std::filesystem::path dirpath = binary_path.parent_path();
85+
if (config == "hifi") {
86+
return dirpath / "config/config_tm2_hifi.tsv";
87+
} else if (config == "ont") {
88+
return dirpath / "config/config_tm2_ont.tsv";
89+
}
9090
return static_cast<std::filesystem::path>(config);
9191
}();
9292
veritymap::Config config = veritymap::Config::load_config_file(config_fn);

0 commit comments

Comments
 (0)