Compression: automatically propagate constants and remove unused nodes (#8)

VincentDerk · web-flow · commit 6cc9e998a851 · 2025-02-06T13:11:07.000+01:00
* Added to_dot_file to python interface

* Added circuit.nb_root_nodes()

* Re-enabling remove_unused_nodes

* Added tests for removing unused nodes.

* Added comment
diff --git a/src/circuit.cpp b/src/circuit.cpp
@@ -67,7 +67,7 @@ Node* Circuit::add_node_level(Node* node) {
 }
 
 Node* Circuit::add_node_level_compressed(Node* node) {
-    return add_node_level(node);
+    // return add_node_level(node); // To disable compression.
     if (node->type != NodeType::And && node->type != NodeType::Or)
         return add_node_level(node);
 
@@ -206,6 +206,8 @@ size_t Circuit::max_layer_width() const {
 }
 
 void Circuit::remove_unused_nodes() {
+    // Should be run before adding a final root layer;
+    // because it might change ix's.
     std::vector<std::vector<bool>> used;
     used.reserve(nb_layers());
     for (const auto& layer : layers)
@@ -238,7 +240,8 @@ void Circuit::remove_unused_nodes() {
         }
     }
 
-    // Clean-up: last layers can be empty (but intermediate ones should not)
+    // Clean-up: last layers can be empty, pop those.
+    // Intermediate layers can not be empty because we use dummy nodes.
     for (std::size_t i = nb_layers()-1; i > 0; --i) {
         if (layers[i].empty()) {
             layers.pop_back();
@@ -257,14 +260,10 @@ void Circuit::remove_unused_nodes() {
         for (auto &node : layers[i])
             node->ix = index++;
     }
-    // Clean-up: last layer has fixed ix order
-    for(size_t i = 0; i < roots.size(); ++i)
-        roots[i]->ix = i;
+
 
 #ifndef NDEBUG
     // print_circuit();
-    // assert, last layer should only contain root nodes.
-    assert(roots.size() == layers[nb_layers()-1].size());
 
     if (layers.size() > 2) {
         // check for each layer, for each node, whether the idx
@@ -363,6 +362,15 @@ void to_dot_file(Circuit& circuit, const std::string& filename) {
             file << "  " << node->hash << " [label=\"" << node->get_label() << "\"]" << std::endl;
         }
     }
+    // Group nodes per layer
+    // using { rank=same; 1; 2; } to group node 1 and 2
+    for (const auto &layer: circuit.layers) {
+          file << "  { rank=same; ";
+          for (const auto *node : layer) {
+              file << node->hash << "; ";
+          }
+          file << "}" << std::endl;
+    }
     file << "}" << std::endl;
 }
 
@@ -402,12 +410,13 @@ void Circuit::add_root_layer() {
 
 
 void cleanup(void* data) noexcept {
-delete[] static_cast<long int*>(data);
+	delete[] static_cast<long int*>(data);
 }
 
 
 std::pair<Arrays, Arrays> Circuit::tensorize() {
-    add_root_layer();
+    remove_unused_nodes();
+	add_root_layer();
     //print_circuit(); // Helpful for debugging small circuits
 
     // per layer, a vector of size the number of children (but children can count twice
@@ -469,10 +478,14 @@ nb::class_<Circuit>(m, "Circuit")
 .def("add_D4_from_file", &Circuit::add_D4_from_file, "filename"_a, "true_lits"_a = std::vector<int>(), "false_lits"_a = std::vector<int>())
 .def("get_indices", &Circuit::get_indices)
 .def("nb_nodes", &Circuit::nb_nodes, "number of nodes in the circuit")
+.def("nb_root_nodes", &Circuit::nb_root_nodes, "number of root nodes in the circuit")
 .def("true_node", &Circuit::true_node, "adds a true node to the circuit, and returns a pointer")
 .def("false_node", &Circuit::false_node, "adds a false node to the circuit, and returns a pointer")
 .def("literal_node", &Circuit::literal_node, "adds a literal node to the circuit ,and returns a pointer")
 .def("or_node", &Circuit::or_node, "children"_a, "adds an or node to the circuit, and returns a pointer")
 .def("and_node", &Circuit::and_node, "children"_a, "adds an and node to the circuit, and returns a pointer")
-.def("set_root", &Circuit::set_root, "root"_a, "marks a node pointer as root");
+.def("set_root", &Circuit::set_root, "root"_a, "marks a node pointer as root")
+.def("remove_unused_nodes", &Circuit::remove_unused_nodes, "Removes unused non-root nodes from the circuit.\nCareful! This invalidates any NodePtr refering to an unused node (i.e., a node not conneected to a root node).");
+
+m.def("to_dot_file", &to_dot_file, "circuit"_a, "filename"_a, "Write the given circuit as dot format to a file");
 }
diff --git a/src/circuit.h b/src/circuit.h
@@ -195,6 +195,10 @@ class Circuit {
         return count;
     }
 
+    std::size_t nb_root_nodes() const {
+        return roots.size();
+    }
+
     /**
      * For debugging purposes;
      * prints every node of each layer
diff --git a/src/klay/utils.py b/src/klay/utils.py
@@ -2,6 +2,8 @@
 from time import perf_counter
 import random
 from array import array
+# noinspection PyUnresolvedReferences
+from .nanobind_ext import to_dot_file
 
 import torch
 try:
@@ -248,4 +250,12 @@ def jax_weights(nb_vars, semiring = "log"):
     weights, neg_weights = python_weights(nb_vars, semiring)
     weights = jax.numpy.array(weights)
     neg_weights = jax.numpy.array(neg_weights)
-    return weights, neg_weights
+    return weights, neg_weights
+
+def circuit_to_dot(circuit, filename):
+    """
+    Write the given circuit as dot format to a file.
+    :param circuit: The circuit to write as dot format.
+    :param filename: The filepath to write to.
+    """
+    to_dot_file(circuit, filename)
diff --git a/tests/test_compression.py b/tests/test_compression.py
@@ -0,0 +1,110 @@
+import klay
+
+
+def test_propagate_simple_true():
+    c = klay.Circuit()
+    t = c.true_node()
+    f = c.false_node()
+    l1, l2 = c.literal_node(1), c.literal_node(2)
+
+    # test on input node
+    assert c.nb_nodes() == 4
+    and_node1 = c.and_node([t, l1]) # l1
+    and_node2 = c.and_node([l1, t]) # l1
+    or_node1 = c.or_node([l1, t]) # t
+    or_node2 = c.or_node([t, l1]) # t
+    assert c.nb_nodes() == 4
+
+    # test on intermediate node
+    l1_l2 = c.and_node([l1, l2])
+    assert c.nb_nodes() == 5
+    and_node1 = c.and_node([t, l1_l2]) # l1
+    and_node2 = c.and_node([l1_l2, t]) # l1
+    or_node1 = c.or_node([l1_l2, t]) # t
+    or_node2 = c.or_node([t, l1_l2]) # t
+    assert c.nb_nodes() == 5
+
+
+def test_propagate_simple_false():
+    c = klay.Circuit()
+    t = c.true_node()
+    f = c.false_node()
+    l1, l2 = c.literal_node(1), c.literal_node(2)
+
+    # test on input node
+    assert c.nb_nodes() == 4
+    and_node1 = c.and_node([f, l1])  # f
+    and_node2 = c.and_node([l1, f])  # f
+    or_node1 = c.or_node([l1, f])  # l1
+    or_node2 = c.or_node([f, l1])  # l1
+    assert c.nb_nodes() == 4
+
+    # test on intermediate node
+    l1_l2 = c.and_node([l1, l2])
+    assert c.nb_nodes() == 5
+    and_node1 = c.and_node([f, l1_l2])  # f
+    and_node2 = c.and_node([l1_l2, f])  # f
+    or_node1 = c.or_node([l1_l2, f])  # l1 & l2
+    or_node2 = c.or_node([f, l1_l2])  # l1 & l2
+    assert c.nb_nodes() == 5
+
+
+def test_propagate_simple_ternary():
+    """ test ternary nodes """
+    c = klay.Circuit()
+    t = c.true_node()
+    f = c.false_node()
+    l1, l2 = c.literal_node(1), c.literal_node(2)
+
+    # test on true
+    assert c.nb_nodes() == 4
+    and_node1 = c.and_node([t, l1, l2])  # l1 & l2
+    assert c.nb_nodes() == 5
+    and_node2 = c.and_node([l2, t, l1])  # l1 & l2
+    assert c.nb_nodes() == 5
+    or_node1 = c.or_node([l1, t, l2])  # t
+    assert c.nb_nodes() == 5
+    or_node2 = c.or_node([l2, l1, t])  # t
+    assert c.nb_nodes() == 5
+
+    # test on false
+    and_node3 = c.and_node([f, l1, l2])  # f
+    assert c.nb_nodes() == 5
+    and_node4 = c.and_node([l2, f, l1])  # f
+    assert c.nb_nodes() == 5
+    or_node3 = c.or_node([l1, f, l2])  # l1 | l2
+    assert c.nb_nodes() == 8, "Expected 8 nodes instead of 6, because l1 and l2 require dummy nodes for the OR-node."
+    or_node4 = c.or_node([l2, l1, f])  # l1 | l2
+    assert c.nb_nodes() == 8
+
+
+def test_removing_useless_nodes1():
+    c = klay.Circuit()
+    l1, l2, l3 = c.literal_node(1), c.literal_node(2), c.literal_node(3)
+    assert c.nb_nodes() == 3
+    and1 = c.and_node([l1, l2])
+    assert c.nb_nodes() == 4
+    or1 = c.or_node([and1, l3])
+    assert c.nb_nodes() == 6  # or1 + 1 dummy node
+    c.set_root(and1)
+    # and1 is root node; but or1 is in a layer above, unused.
+    assert c.nb_nodes() == 6
+    c.remove_unused_nodes()  # should remove or1 + 1 dummy node
+    assert c.nb_nodes() == 4, f"Expected 4 nodes instead of {c.nb_nodes()}"
+
+
+def test_removing_useless_nodes2():
+    c = klay.Circuit()
+    l1, l2, l3 = c.literal_node(1), c.literal_node(2), c.literal_node(3)
+    assert c.nb_nodes() == 3
+    and1 = c.and_node([l1, l2])
+    assert c.nb_nodes() == 4
+    or1 = c.or_node([and1, l3])
+    assert c.nb_nodes() == 6  # or1 + 1 dummy node
+    and2 = c.and_node([l1, l3])  # useless
+    assert c.nb_nodes() == 7
+    or2 = c.or_node([l1, l2])  # useless
+    assert c.nb_nodes() == 10  # or2 + 2 dummy nodes
+    c.set_root(or1)
+    c.remove_unused_nodes()  # should remove `and2`, `or2`, and 2 dummy nodes
+    assert c.nb_nodes() == 6, f"Expected 5 nodes instead of {c.nb_nodes()}"