Skip to content

Commit

Permalink
Proper formatting enforced
Browse files Browse the repository at this point in the history
  • Loading branch information
seryrzu committed Mar 23, 2022
1 parent e7fb7e7 commit b4ef20e
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 187 deletions.
176 changes: 87 additions & 89 deletions veritymap/src/projects/veritymap/kmer_index/approx_kmer_indexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,89 +127,88 @@ class ApproxKmerIndexer {
[[nodiscard]] KmerIndexes GetKmerIndexes(const std::vector<Contig> &contigs,
const kmer_filter::KmerFilter &kmer_filter,
logging::Logger &logger) const {
KmerIndexes kmer_indexes;
for (auto it = contigs.cbegin(); it!=contigs.cend(); ++it) {
const Contig &contig{*it};
logger.info() << "Creating index for contig " << contig.id << "\n";
kmer_indexes.emplace_back(GetKmerIndex(contig,
kmer_filter,
it - contigs.cbegin(),
logger));
}
return kmer_indexes;
KmerIndexes kmer_indexes;
for (auto it = contigs.cbegin(); it != contigs.cend(); ++it) {
const Contig &contig{*it};
logger.info() << "Creating index for contig " << contig.id << "\n";
kmer_indexes.emplace_back(GetKmerIndex(contig,
kmer_filter,
it - contigs.cbegin(),
logger));
}
return kmer_indexes;
}

void BanHighFreqUniqueKmers(const std::vector<Contig> &contigs,
const std::vector<Contig> &readset,
KmerIndexes &kmer_indexes,
logging::Logger &logger) const {

// ban unique k-mers in assembly that have unusually high coverage
const double coverage
{tools::common::coverage_utils::get_coverage(contigs, readset)};
const uint max_read_freq = std::max(1.,
ceil(kmer_indexer_params
.careful_upper_bnd_cov_mult
*coverage));

Counter kmer_cnt;
for (auto it = readset.begin(); it!=readset.end(); ++it) {
logger.trace() << it - readset.begin() << " " << readset.size()
<< "\n";
const Contig &contig = *it;
if (contig.size() < hasher.k) {
continue;
}
KWH<htype> kwh(hasher, contig.seq, 0);
while (true) {
if (!kwh.hasNext()) {
break;
}
kwh = kwh.next();
const htype fhash = kwh.get_fhash();
const htype rhash = kwh.get_rhash();
for (const htype hash : std::vector<htype>{fhash, rhash}) {
bool is_unique = false;
for (const KmerIndex &index : kmer_indexes) {
auto it = index.find(hash);
if (it!=index.end() and it->second.size()==1) {
is_unique = true;
break;
}
}
if (is_unique) {
kmer_cnt[hash] += 1;
}
}
void BanHighFreqUniqueKmers(const std::vector<Contig> &contigs,
const std::vector<Contig> &readset,
KmerIndexes &kmer_indexes,
logging::Logger &logger) const {

// ban unique k-mers in assembly that have unusually high coverage
const double coverage{tools::common::coverage_utils::get_coverage(contigs, readset)};
const uint max_read_freq = std::max(1.,
ceil(kmer_indexer_params
.careful_upper_bnd_cov_mult
* coverage));

Counter kmer_cnt;
for (auto it = readset.begin(); it != readset.end(); ++it) {
logger.trace() << it - readset.begin() << " " << readset.size()
<< "\n";
const Contig &contig = *it;
if (contig.size() < hasher.k) {
continue;
}
KWH<htype> kwh(hasher, contig.seq, 0);
while (true) {
if (!kwh.hasNext()) {
break;
}
kwh = kwh.next();
const htype fhash = kwh.get_fhash();
const htype rhash = kwh.get_rhash();
for (const htype hash : std::vector<htype>{fhash, rhash}) {
bool is_unique = false;
for (const KmerIndex &index : kmer_indexes) {
auto it = index.find(hash);
if (it != index.end() and it->second.size() == 1) {
is_unique = true;
break;
}
}
if (is_unique) {
kmer_cnt[hash] += 1;
}
}
}
}

uint64_t n{0};
for (auto &[hash, cnt] : kmer_cnt) {
if (cnt > max_read_freq) {
for (KmerIndex &index : kmer_indexes) {
auto it = index.find(hash);
if (it!=index.end()) {
index.erase(it);
break;
}
}
++n;
}
uint64_t n{0};
for (auto &[hash, cnt] : kmer_cnt) {
if (cnt > max_read_freq) {
for (KmerIndex &index : kmer_indexes) {
auto it = index.find(hash);
if (it != index.end()) {
index.erase(it);
break;
}
}
logger.info() << "Filtered " << n << " high multiplicity k-mers\n";
++n;
}
}
logger.info() << "Filtered " << n << " high multiplicity k-mers\n";
}

public:
ApproxKmerIndexer(const size_t nthreads,
const RollingHash<htype> &hasher,
const Config::CommonParams &common_params,
const Config::KmerIndexerParams &kmer_indexer_params)
: nthreads{nthreads},
hasher{hasher},
common_params{common_params},
kmer_indexer_params{
kmer_indexer_params} {}
ApproxKmerIndexer(const size_t nthreads,
const RollingHash<htype> &hasher,
const Config::CommonParams &common_params,
const Config::KmerIndexerParams &kmer_indexer_params)
: nthreads{nthreads},
hasher{hasher},
common_params{common_params},
kmer_indexer_params{
kmer_indexer_params} {}

ApproxKmerIndexer(const ApproxKmerIndexer &) = delete;
ApproxKmerIndexer(ApproxKmerIndexer &&) = delete;
Expand All @@ -219,22 +218,21 @@ class ApproxKmerIndexer {
[[nodiscard]] KmerIndexes extract(const std::vector<Contig> &contigs,
const std::optional<std::vector<Contig>> &readset_optional,
logging::Logger &logger) const {
const kmer_filter::KmerFilterBuilder kmer_filter_builder
{nthreads, hasher, common_params, kmer_indexer_params};
logger.info() << "Creating kmer filter\n";
const kmer_filter::KmerFilter
kmer_filter = kmer_filter_builder.GetKmerFilter(contigs, logger);
const kmer_filter::KmerFilterBuilder kmer_filter_builder{nthreads, hasher, common_params, kmer_indexer_params};
logger.info() << "Creating kmer filter\n";
const kmer_filter::KmerFilter
kmer_filter = kmer_filter_builder.GetKmerFilter(contigs, logger);
logger.info()
<< "Finished creating kmer filter. Using it to build kmer indexes\n";
KmerIndexes kmer_indexes = GetKmerIndexes(contigs, kmer_filter, logger);
if (readset_optional.has_value()) {
// Careful mode
logger.info()
<< "Finished creating kmer filter. Using it to build kmer indexes\n";
KmerIndexes kmer_indexes = GetKmerIndexes(contigs, kmer_filter, logger);
if (readset_optional.has_value()) {
// Careful mode
logger.info()
<< "Careful mode requested. Filtering high multiplicity unique k-mers\n";
const std::vector<Contig> &readset = readset_optional.value();
BanHighFreqUniqueKmers(contigs, readset, kmer_indexes, logger);
}
return kmer_indexes;
<< "Careful mode requested. Filtering high multiplicity unique k-mers\n";
const std::vector<Contig> &readset = readset_optional.value();
BanHighFreqUniqueKmers(contigs, readset, kmer_indexes, logger);
}
return kmer_indexes;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ get_indexed_targets(const std::optional<std::vector<Contig>> &queries,

IndexedContigs indexed_targets;
for (auto it = kmers_indexes.begin(); it != kmers_indexes.end(); ++it) {
const Contig& target = targets.at(it - kmers_indexes.begin());
const Contig &target = targets.at(it - kmers_indexes.begin());
indexed_targets.emplace_back(target, hasher, kmer_indexer_params.max_rare_cnt_target, std::move(*it));
}

Expand All @@ -72,7 +72,7 @@ get_indexed_targets(const std::optional<std::vector<Contig>> &queries,
kmer_indexes_os.close();
logger.info() << "Kmer indexes are exported to " << kmer_indexes_fn << std::endl;

for (const auto& indexed_target : indexed_targets) {
for (const auto &indexed_target : indexed_targets) {
logger.info() << "Target " << indexed_target.get_contig().id
<< ", # Rare kmers = " << indexed_target.get_kmer_index().size() << std::endl;
}
Expand Down
76 changes: 38 additions & 38 deletions veritymap/src/projects/veritymap/veritymap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,46 +47,46 @@ int main(int argc, char** argv) {
std::time_t now = std::chrono::system_clock::to_time_t(time_point);
logger << "Launch time: " << std::put_time(std::localtime(&now), "%c %Z") << std::endl;

std::stringstream cmd_ss;
for (size_t i = 0; i < argc; i++) {
cmd_ss << argv[i] << " ";
}
const std::string cmd = cmd_ss.str();
logger << "CMD: " << cmd << std::endl;

const std::filesystem::path target_path =
std::filesystem::canonical(parser.getValue("target"));

auto get_path_w_def = [&parser](const std::string &parameter) {
std::filesystem::path path = parser.getValue(parameter);
if (path!="none") {
path = std::filesystem::canonical(path);
} else {
path = "";
}
return path;
};
const std::filesystem::path queries_path = get_path_w_def("queries");

bool to_compress = parser.getCheck("compress");
bool only_index = parser.getCheck("only-index");
bool careful_mode = parser.getCheck("careful");
if (careful_mode and queries_path=="") {
std::cerr << "Cannot use careful mode if no queries are provided\n";
return 1;
std::stringstream cmd_ss;
for (size_t i = 0; i < argc; i++) {
cmd_ss << argv[i] << " ";
}
const std::string cmd = cmd_ss.str();
logger << "CMD: " << cmd << std::endl;

const std::filesystem::path target_path =
std::filesystem::canonical(parser.getValue("target"));

auto get_path_w_def = [&parser](const std::string& parameter) {
std::filesystem::path path = parser.getValue(parameter);
if (path != "none") {
path = std::filesystem::canonical(path);
} else {
path = "";
}
return path;
};
const std::filesystem::path queries_path = get_path_w_def("queries");

bool to_compress = parser.getCheck("compress");
bool only_index = parser.getCheck("only-index");
bool careful_mode = parser.getCheck("careful");
if (careful_mode and queries_path == "") {
std::cerr << "Cannot use careful mode if no queries are provided\n";
return 1;
}

const std::filesystem::path index_path = get_path_w_def("index");

const std::filesystem::path index_path = get_path_w_def("index");

const std::filesystem::path binary_path = argv[0];
const std::filesystem::path config_fn = [&parser, &logger, &binary_path] {
std::string config = parser.getValue("config");
std::filesystem::path dirpath = binary_path.parent_path();
if (config=="hifi") {
return dirpath/"config/config_tm2_hifi.tsv";
} else if (config=="ont") {
return dirpath/"config/config_tm2_ont.tsv";
}
const std::filesystem::path binary_path = argv[0];
const std::filesystem::path config_fn = [&parser, &logger, &binary_path] {
std::string config = parser.getValue("config");
std::filesystem::path dirpath = binary_path.parent_path();
if (config == "hifi") {
return dirpath / "config/config_tm2_hifi.tsv";
} else if (config == "ont") {
return dirpath / "config/config_tm2_ont.tsv";
}
return static_cast<std::filesystem::path>(config);
}();
veritymap::Config config = veritymap::Config::load_config_file(config_fn);
Expand Down
Loading

0 comments on commit b4ef20e

Please sign in to comment.