Skip to content

Commit d54fa3e

Browse files
authored
Merge pull request yatisht#420 from AngieHinrichs/mask_except_nodes
matUtils mask --mask-mutations: support excluding nodes
2 parents 55407f5 + 40c9a99 commit d54fa3e

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

src/matUtils/mask.cpp

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,30 @@ bool match_mutations(MAT::Mutation* target, MAT::Mutation* query) {
719719
}
720720
return true;
721721
}
722+
723+
size_t mask_mutation_on_branch(MAT::Node* node, MAT::Mutation* mutobj, std::set<string>& exclude_nodes) {
724+
size_t instances_masked = 0;
725+
// The expected common case is to not match any mutations and have nothing to remove.
726+
std::vector<MAT::Mutation> muts_to_remove;
727+
for (auto& mut: node->mutations) {
728+
if (match_mutations(mutobj, &mut)) {
729+
instances_masked++;
730+
muts_to_remove.push_back(mut);
731+
}
732+
}
733+
for (auto mut: muts_to_remove) {
734+
auto iter = std::find(node->mutations.begin(), node->mutations.end(), mut);
735+
node->mutations.erase(iter);
736+
}
737+
for (auto child: node->children) {
738+
if (exclude_nodes.count(child->identifier) == 0) {
739+
instances_masked += mask_mutation_on_branch(child, mutobj, exclude_nodes);
740+
}
741+
else { fprintf(stderr, "Excluding %s for position %d\n", child->identifier.c_str(), mutobj->position); }
742+
}
743+
return instances_masked;
744+
}
745+
722746
void restrictMutationsLocally (std::string mutations_filename, MAT::Tree* T, bool global) {
723747
std::ifstream infile(mutations_filename);
724748
if (!infile) {
@@ -741,36 +765,30 @@ void restrictMutationsLocally (std::string mutations_filename, MAT::Tree* T, boo
741765
MAT::string_split(line, delim, words);
742766
std::string target_node;
743767
std::string target_mutation;
768+
std::set<string> exclude_nodes;
744769
if ((words.size() == 1) || (global)) {
745770
//std::cerr << "Masking mutations globally.\n";
746771
target_mutation = words[0];
747772
target_node = rootid;
748773
} else {
749774
target_mutation = words[0];
750775
target_node = words[1];
776+
if (words.size() > 2) {
777+
// semicolon-separated set of descendent node IDs to exclude from masking
778+
std::vector<std::string> node_ids;
779+
MAT::string_split(words[2], ';', node_ids);
780+
for (auto node_id: node_ids) {
781+
exclude_nodes.insert(node_id);
782+
}
783+
}
751784
}
752785
MAT::Mutation* mutobj = MAT::mutation_from_string(target_mutation);
753-
size_t instances_masked = 0;
754786
MAT::Node* rn = T->get_node(target_node);
755787
if (rn == NULL) {
756788
fprintf(stderr, "ERROR: Internal node %s requested for masking does not exist in the tree. Exiting\n", target_node.c_str());
757789
exit(1);
758790
}
759-
// fprintf(stderr, "Masking mutation %s below node %s\n", ml.first.c_str(), ml.second.c_str());
760-
for (auto n: T->depth_first_expansion(rn)) {
761-
// The expected common case is to not match any mutations and have nothing to remove.
762-
std::vector<MAT::Mutation> muts_to_remove;
763-
for (auto& mut: n->mutations) {
764-
if (match_mutations(mutobj, &mut)) {
765-
instances_masked++;
766-
muts_to_remove.push_back(mut);
767-
}
768-
}
769-
for (auto mut: muts_to_remove) {
770-
auto iter = std::find(n->mutations.begin(), n->mutations.end(), mut);
771-
n->mutations.erase(iter);
772-
}
773-
}
791+
size_t instances_masked = mask_mutation_on_branch(rn, mutobj, exclude_nodes);
774792
total_masked += instances_masked;
775793
}
776794
fprintf(stderr, "Completed in %ld msec \n", timer.Stop());

0 commit comments

Comments
 (0)