|
| 1 | +// This code is part of the project "Theoretically Efficient Parallel Graph |
| 2 | +// Algorithms Can Be Fast and Scalable", presented at Symposium on Parallelism |
| 3 | +// in Algorithms and Architectures, 2018. |
| 4 | +// Copyright (c) 2018 Laxman Dhulipala, Guy Blelloch, and Julian Shun |
| 5 | +// |
| 6 | +// Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | +// of this software and associated documentation files (the "Software"), to deal |
| 8 | +// in the Software without restriction, including without limitation the rights |
| 9 | +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | +// copies of the Software, and to permit persons to whom the Software is |
| 11 | +// furnished to do so, subject to the following conditions: |
| 12 | +// |
| 13 | +// The above copyright notice and this permission notice shall be included in |
| 14 | +// all copies or substantial portions of the Software. |
| 15 | +// |
| 16 | +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | +// SOFTWARE. |
| 23 | +// |
| 24 | +// This file provides a parallel implementation of (weighted) LabelPropagation. |
| 25 | + |
| 26 | +#pragma once |
| 27 | + |
| 28 | +#include <math.h> |
| 29 | + |
| 30 | +#include "benchmarks/GraphColoring/Hasenplaugh14/GraphColoring.h" |
| 31 | +#include "gbbs/gbbs.h" |
| 32 | + |
| 33 | +namespace gbbs { |
| 34 | + |
| 35 | +using label_type = int64_t; |
| 36 | +constexpr label_type kInvalidLabel = std::numeric_limits<label_type>::max(); |
| 37 | + |
| 38 | +namespace internal { |
| 39 | + |
| 40 | +template <class Graph> |
| 41 | +label_type compute_new_color(Graph& G, |
| 42 | + const parlay::sequence<label_type>& cur_labels, |
| 43 | + gbbs::uintE node_id) { |
| 44 | + using Weight = typename Graph::weight_type; |
| 45 | + auto node = G.get_vertex(node_id); |
| 46 | + size_t degree = node.out_degree(); |
| 47 | + |
| 48 | + // Collect the set of neighboring labels, and weights to them. |
| 49 | + using Elt = std::pair<label_type, double>; |
| 50 | + parlay::sequence<Elt> label_and_weight(degree); |
| 51 | + auto map_f = [&](const gbbs::uintE& u, const gbbs::uintE& v, |
| 52 | + const Weight& weight, size_t index) { |
| 53 | + if constexpr (std::is_same_v<Weight, gbbs::empty>) { |
| 54 | + label_and_weight[index] = Elt{cur_labels[v], double{1}}; |
| 55 | + } else { |
| 56 | + label_and_weight[index] = Elt{cur_labels[v], double{weight}}; |
| 57 | + } |
| 58 | + }; |
| 59 | + node.out_neighbors().map_with_index(map_f); |
| 60 | + |
| 61 | + // Sum up the weights. |
| 62 | + parlay::sort_inplace(label_and_weight); |
| 63 | + auto copy_f = [](Elt a, Elt b) { |
| 64 | + if (a.first == b.first) { |
| 65 | + return Elt{a.first, a.second + b.second}; |
| 66 | + } |
| 67 | + return b; |
| 68 | + }; |
| 69 | + Elt identity = {std::numeric_limits<label_type>::max(), 0}; |
| 70 | + auto copy_monoid = parlay::make_monoid(copy_f, identity); |
| 71 | + parlay::scan_inclusive_inplace(label_and_weight, copy_monoid); |
| 72 | + |
| 73 | + // Collect the total weight per-label. |
| 74 | + auto label_ends = |
| 75 | + parlay::pack_index(parlay::delayed_seq<bool>(degree, [&](size_t i) { |
| 76 | + return (i == degree - 1) | |
| 77 | + (label_and_weight[i].first != label_and_weight[i + 1].first); |
| 78 | + })); |
| 79 | + auto weights_and_labels = parlay::map(label_ends, [&](size_t index) { |
| 80 | + return std::make_pair(label_and_weight[index].second, |
| 81 | + label_and_weight[index].first); |
| 82 | + }); |
| 83 | + parlay::sort_inplace(weights_and_labels, std::greater<>{}); |
| 84 | + |
| 85 | + if (degree > 0) { |
| 86 | + return weights_and_labels[0].second; |
| 87 | + } |
| 88 | + return kInvalidLabel; |
| 89 | +} |
| 90 | + |
| 91 | +} // namespace internal |
| 92 | + |
| 93 | +// Expects an undirected (possibly weighted) graph. |
| 94 | +template <class Graph> |
| 95 | +parlay::sequence<label_type> LabelPropagation( |
| 96 | + Graph& G, const parlay::sequence<label_type>& initial_labels, |
| 97 | + size_t max_iters = 100, |
| 98 | + bool use_async = true, |
| 99 | + bool use_graph_coloring = false) { |
| 100 | + const uintE n = G.n; |
| 101 | + using Weight = typename Graph::weight_type; |
| 102 | + |
| 103 | + // Create two copies of the labels. |
| 104 | + parlay::sequence<label_type> cur_labels = |
| 105 | + parlay::sequence<label_type>(initial_labels); |
| 106 | + parlay::sequence<label_type> next_labels = cur_labels; |
| 107 | + |
| 108 | + parlay::sequence<bool> cur_active(n, true); |
| 109 | + parlay::sequence<bool> next_active(n, false); |
| 110 | + |
| 111 | + // Try the graph-coloring variant. |
| 112 | + using color = gbbs::uintE; |
| 113 | + parlay::sequence<color> coloring; |
| 114 | + parlay::sequence<std::pair<color, gbbs::uintE>> color_and_node; |
| 115 | + parlay::sequence<size_t> color_starts; |
| 116 | + if (use_graph_coloring) { |
| 117 | + // Note that if we are using graph coloring, we want to update the same |
| 118 | + // label set, so async should be set to true. |
| 119 | + use_async = true; |
| 120 | + coloring = Coloring(G); |
| 121 | + color_and_node = parlay::tabulate<std::pair<color, gbbs::uintE>>( |
| 122 | + coloring.size(), [&](gbbs::uintE i) { return std::make_pair(coloring[i], i); }); |
| 123 | + parlay::sort_inplace(coloring); |
| 124 | + color_starts = parlay::pack_index( |
| 125 | + parlay::delayed_seq<bool>(color_and_node.size(), [&](size_t i) { |
| 126 | + return (i == 0) | |
| 127 | + (color_and_node[i].first != color_and_node[i - 1].first); |
| 128 | + })); |
| 129 | + } |
| 130 | + |
| 131 | + std::cout << "Starting LabelPropagation. Parameters:" << std::endl; |
| 132 | + std::cout << " - use_async = " << use_async << std::endl; |
| 133 | + std::cout << " - use_graph_coloring = " << use_graph_coloring << std::endl; |
| 134 | + std::cout << " - max_iters = " << max_iters << std::endl; |
| 135 | + |
| 136 | + size_t iter = 0; |
| 137 | + while (iter < max_iters) { |
| 138 | + std::cout << "Running iteration: " << iter << std::endl; |
| 139 | + std::atomic<bool> changed = false; |
| 140 | + |
| 141 | + auto process_node = [&](gbbs::uintE i) { |
| 142 | + // One of our neighbors changed in the last iteration (or this is |
| 143 | + // the first iteration). Recompute this node's label. |
| 144 | + if (cur_active[i]) { |
| 145 | + // Reset our flag for the next iteration. |
| 146 | + cur_active[i] = false; |
| 147 | + |
| 148 | + // Computes the next label based on cur_labels. |
| 149 | + label_type new_label = internal::compute_new_color(G, cur_labels, i); |
| 150 | + |
| 151 | + if (new_label != kInvalidLabel && cur_labels[i] != new_label) { |
| 152 | + // Set our label for the next iteration. |
| 153 | + if (use_async) { |
| 154 | + // If using async, just update the current label set. |
| 155 | + cur_labels[i] = new_label; |
| 156 | + } else { |
| 157 | + next_labels[i] = new_label; |
| 158 | + } |
| 159 | + // Mark our neighbors to be active in the next iteration. |
| 160 | + auto activate_f = [&](const gbbs::uintE& u, const gbbs::uintE& v, |
| 161 | + const Weight& weight) { |
| 162 | + if (!next_active[v]) next_active[v] = true; |
| 163 | + }; |
| 164 | + G.get_vertex(i).out_neighbors().map(activate_f); |
| 165 | + if (!changed) { |
| 166 | + changed = true; |
| 167 | + } |
| 168 | + } |
| 169 | + } |
| 170 | + }; |
| 171 | + |
| 172 | + if (!use_graph_coloring) { |
| 173 | + // Map over all nodes. |
| 174 | + parlay::parallel_for(0, n, [&](size_t i) { process_node(i); }); |
| 175 | + } else { |
| 176 | + // Map over each color set, one after the other. |
| 177 | + for (size_t c = 0; c < color_starts.size(); ++c) { |
| 178 | + size_t start_offset = color_starts[c]; |
| 179 | + size_t end_offset = (c == color_starts.size() - 1) |
| 180 | + ? color_and_node.size() |
| 181 | + : color_starts[c + 1]; |
| 182 | + // Map over all vertices of the same color in parallel. |
| 183 | + parlay::parallel_for(start_offset, end_offset, [&](size_t i) { |
| 184 | + gbbs::uintE node_id = color_and_node[i].second; |
| 185 | + process_node(node_id); |
| 186 | + }); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + // Swap the current and next labels/active sets. |
| 191 | + if (!use_async) { |
| 192 | + std::swap(cur_labels, next_labels); |
| 193 | + } |
| 194 | + std::swap(cur_active, next_active); |
| 195 | + |
| 196 | + // Check convergence. If no labels changed in this iteration, quit. |
| 197 | + if (!changed) break; |
| 198 | + |
| 199 | + ++iter; |
| 200 | + } |
| 201 | + // for (size_t i=0; i < n; ++i) { |
| 202 | + // std::cout << cur_labels[i] << std::endl; |
| 203 | + // } |
| 204 | + return cur_labels; |
| 205 | +} |
| 206 | + |
| 207 | +template <class Graph> |
| 208 | +parlay::sequence<label_type> LabelPropagation(Graph& G) { |
| 209 | + auto initial_labels = parlay::tabulate(G.n, [&](label_type i) { return i; }); |
| 210 | + return LabelPropagation(G, initial_labels); |
| 211 | +} |
| 212 | + |
| 213 | +} // namespace gbbs |
0 commit comments