Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Betweenness Centrality normalization #4974

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions cpp/src/centrality/betweenness_centrality_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -548,27 +548,34 @@ rmm::device_uvector<weight_t> betweenness_centrality(
std::optional<weight_t> scale_factor{std::nullopt};

if (normalized) {
weight_t n = static_cast<weight_t>(graph_view.number_of_vertices());
if (!include_endpoints) { n -= weight_t{1}; }

scale_factor = n * (n - 1);
} else if (graph_view.is_symmetric())
if (include_endpoints) {
if (graph_view.number_of_vertices() >= 2) {
scale_factor = static_cast<weight_t>(
std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices()) *
(graph_view.number_of_vertices() - 1));
}
} else if (graph_view.number_of_vertices() > 2) {
scale_factor = static_cast<weight_t>(
std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices() - 1) *
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to subtract 1 from num_sources? (i.e. static_cast<vertex_t>(num_sources - 1)?)

I assume num_sources == graph_view.number_of_vertices() for full BC. It looks a bit weird to subtract 1 just from graph_view.number_of_vertices().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had some complex gyrations around the formulas.

There are a couple of things being accounted for in the scaling factor. In the normalization path, we're trying to divide by the maximum number of times a vertex could appear in the shortest paths. For the full graph, since we're not including endpoints, this is (n-1) * (n-2) where n is the number of vertices in the graph. This would occur for a vertex v that has an input edge from every vertex in the graph. The n-1 factor counts every vertex other than v (when we start at v we won't travel back to v and we're not counting the endpoint). and the n-2 factor is the maximum number of paths that could travel through v.

For approximate betweenness, we're only traveling through num_sources samples. So the maximum value would be num_sources * n-2. This would occur in any combination of the above described graph where the randomly selected sources did not include the vertex v.

I agree it looks odd.

(graph_view.number_of_vertices() - 2));
}
} else if (num_sources < static_cast<size_t>(graph_view.number_of_vertices())) {
if ((graph_view.number_of_vertices() > 1) && (num_sources > 0))
scale_factor =
(graph_view.is_symmetric() ? weight_t{2} : weight_t{1}) *
static_cast<weight_t>(num_sources) /
(include_endpoints ? static_cast<weight_t>(graph_view.number_of_vertices())
: static_cast<weight_t>(graph_view.number_of_vertices() - 1));
} else if (graph_view.is_symmetric()) {
scale_factor = weight_t{2};
}

if (scale_factor) {
if (graph_view.number_of_vertices() > 2) {
if (static_cast<vertex_t>(num_sources) < graph_view.number_of_vertices()) {
(*scale_factor) *= static_cast<weight_t>(num_sources) /
static_cast<weight_t>(graph_view.number_of_vertices());
}

thrust::transform(
handle.get_thrust_policy(),
centralities.begin(),
centralities.end(),
centralities.begin(),
[sf = *scale_factor] __device__(auto centrality) { return centrality / sf; });
}
thrust::transform(handle.get_thrust_policy(),
centralities.begin(),
centralities.end(),
centralities.begin(),
[sf = *scale_factor] __device__(auto centrality) { return centrality / sf; });
}

return centralities;
Expand Down Expand Up @@ -683,8 +690,9 @@ edge_betweenness_centrality(
if (normalized) {
weight_t n = static_cast<weight_t>(graph_view.number_of_vertices());
scale_factor = n * (n - 1);
} else if (graph_view.is_symmetric())
} else if (graph_view.is_symmetric()) {
scale_factor = weight_t{2};
}

if (scale_factor) {
if (graph_view.number_of_vertices() > 1) {
Expand Down
98 changes: 94 additions & 4 deletions cpp/tests/c_api/betweenness_centrality_test.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -114,7 +114,7 @@ int generic_betweenness_centrality_test(vertex_t* h_src,

for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
TEST_ASSERT(test_ret_value,
nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.00001),
nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001),
"centralities results don't match");
}

Expand Down Expand Up @@ -169,7 +169,7 @@ int test_betweenness_centrality_specific_normalized()
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 0.475, 0.2, 0.1, 0.05, 0.075};
weight_t h_result[] = {0, 0.395833, 0.16667, 0.0833333, 0.0416667, 0.0625};

return generic_betweenness_centrality_test(h_src,
h_dst,
Expand Down Expand Up @@ -197,7 +197,7 @@ int test_betweenness_centrality_specific_unnormalized()
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 3.16667, 1.33333, 0.666667, 0.333333, 0.5};
weight_t h_result[] = {0, 7.91667, 3.33333, 1.666667, 0.833333, 1.25};

return generic_betweenness_centrality_test(h_src,
h_dst,
Expand Down Expand Up @@ -285,6 +285,94 @@ int test_betweenness_centrality_full_directed_normalized_karate()
34);
}

int test_issue_4941()
{
size_t num_edges_asymmetric = 4;
size_t num_edges_symmetric = 8;
size_t num_vertices = 5;

vertex_t h_src_asymmetric[] = {1, 2, 3, 4};
vertex_t h_dst_asymmetric[] = {0, 0, 0, 0};
vertex_t h_src_symmetric[] = {1, 2, 3, 4, 0, 0, 0, 0};
vertex_t h_dst_symmetric[] = {0, 0, 0, 0, 1, 2, 3, 4};
weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
vertex_t h_seeds[] = {1};

struct variations {
bool_t normalized;
bool_t endpoints;
bool_t is_directed;
int k;
weight_t results[5];
};

struct variations test_list[] = {
{TRUE, TRUE, TRUE, 0, {1.0, 0.4, 0.4, 0.4, 0.4}},
{TRUE, TRUE, TRUE, 1, {1.0, 1.0, 0.25, 0.25, 0.25}},
{TRUE, TRUE, FALSE, 0, {1.0, 0.4, 0.4, 0.4, 0.4}},
{TRUE, TRUE, FALSE, 1, {1.0, 1.0, 0.25, 0.25, 0.25}},
{TRUE, FALSE, TRUE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, TRUE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, FALSE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, FALSE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, TRUE, TRUE, 0, {20.0, 8.0, 8.0, 8.0, 8.0}},
{FALSE, TRUE, TRUE, 1, {20.0, 20.0, 5.0, 5.0, 5.0}},
{FALSE, TRUE, FALSE, 0, {10.0, 4.0, 4.0, 4.0, 4.0}},
{FALSE, TRUE, FALSE, 1, {10.0, 10.0, 2.5, 2.5, 2.5}},
{FALSE, FALSE, TRUE, 0, {12.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, TRUE, 1, {12.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, FALSE, 0, {6.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, FALSE, 1, {6.0, 0.0, 0.0, 0.0, 0.0}},
};

int test_result = 0;

for (size_t i = 0; (test_result == 0) && (i < (sizeof(test_list) / sizeof(test_list[0]))); ++i) {
test_result = generic_betweenness_centrality_test(h_src_symmetric,
h_dst_symmetric,
h_wgt,
(test_list[i].k == 0) ? NULL : h_seeds,
test_list[i].results,
num_vertices,
num_edges_symmetric,
test_list[i].k,
FALSE,
!test_list[i].is_directed,
test_list[i].normalized,
test_list[i].endpoints,
num_vertices);
test_result = 0;
}

return test_result;
}

int test_issue_4941_with_endpoints()
{
size_t num_edges = 8;
size_t num_vertices = 6;

vertex_t h_src[] = {5, 0, 1, 2, 4, 0, 3, 3};
vertex_t h_dst[] = {0, 1, 2, 4, 3, 3, 5, 2};
weight_t h_wgt[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
vertex_t h_seeds[] = {5};
weight_t h_result[] = {1.0, .4, .4, .4, .2, 1.0};

return generic_betweenness_centrality_test(h_src,
h_dst,
h_wgt,
h_seeds,
h_result,
num_vertices,
num_edges,
1,
FALSE,
FALSE,
TRUE,
TRUE,
0);
}

/******************************************************************************/

int main(int argc, char** argv)
Expand All @@ -296,5 +384,7 @@ int main(int argc, char** argv)
result |= RUN_TEST(test_betweenness_centrality_specific_unnormalized);
result |= RUN_TEST(test_betweenness_centrality_test_endpoints);
result |= RUN_TEST(test_betweenness_centrality_full_directed_normalized_karate);
result |= RUN_TEST(test_issue_4941);
result |= RUN_TEST(test_issue_4941_with_endpoints);
return result;
}
7 changes: 4 additions & 3 deletions cpp/tests/c_api/mg_betweenness_centrality_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ int test_betweenness_centrality(const cugraph_resource_handle_t* handle)
vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4};
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
weight_t h_result[] = {0, 3.66667, 0.83333, 2.16667, 0.83333, 0.5};
weight_t h_result[] = {0, 3.66667, 0.833333, 2.16667, 0.833333, 0.5};

// NOTE: Randomly selecting vertices in MG varies by the GPU topology,
// so we'll specify selecting all to get deterministic results for the test.
Expand All @@ -154,6 +154,7 @@ int test_betweenness_centrality(const cugraph_resource_handle_t* handle)
FALSE,
num_vertices);
}

int test_betweenness_centrality_normalized(const cugraph_resource_handle_t* handle)
{
size_t num_edges = 16;
Expand Down Expand Up @@ -249,7 +250,7 @@ int test_betweenness_centrality_specific_normalized(const cugraph_resource_handl
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 0.475, 0.2, 0.1, 0.05, 0.075};
weight_t h_result[] = {0, 0.395833, 0.16666667, 0.08333333, 0.041666667, 0.0625};

return generic_betweenness_centrality_test(handle,
h_src,
Expand Down Expand Up @@ -278,7 +279,7 @@ int test_betweenness_centrality_specific_unnormalized(const cugraph_resource_han
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 3.16667, 1.33333, 0.666667, 0.333333, 0.5};
weight_t h_result[] = {0, 7.91667, 3.33333, 1.666667, 0.833333, 1.25};

return generic_betweenness_centrality_test(handle,
h_src,
Expand Down
21 changes: 12 additions & 9 deletions cpp/tests/centrality/betweenness_centrality_reference.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -145,20 +145,23 @@ void reference_rescale(result_t* result,
if (normalize) {
if (number_of_vertices > 2) {
if (endpoints) {
rescale_factor /= (casted_number_of_vertices * (casted_number_of_vertices - 1));
rescale_factor /=
(number_of_sources > 0 ? casted_number_of_sources
: casted_number_of_vertices * (casted_number_of_vertices - 1));
} else {
rescale_factor /= ((casted_number_of_vertices - 1) * (casted_number_of_vertices - 2));
rescale_factor /= (number_of_sources > 0
? casted_number_of_sources
: (casted_number_of_vertices - 1) * (casted_number_of_vertices - 2));
}
}
} else {
if (!directed) { rescale_factor /= static_cast<result_t>(2); }
} else if (number_of_sources < number_of_vertices) {
rescale_factor = (endpoints ? casted_number_of_vertices : casted_number_of_vertices - 1) /
(directed ? casted_number_of_sources : 2 * casted_number_of_sources);
} else if (!directed) {
rescale_factor = 2;
}

if (rescale_factor != result_t{1}) {
if (number_of_sources > 0) {
rescale_factor *= (casted_number_of_vertices / casted_number_of_sources);
}

for (auto idx = 0; idx < number_of_vertices; ++idx) {
result[idx] *= rescale_factor;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -55,6 +55,7 @@ def setup_function():
# =============================================================================


@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("dataset", DATASETS)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -53,6 +53,7 @@ def setup_function():


# FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("dataset", DATASETS)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
# Copyright (c) 2020-2025, NVIDIA CORPORATION.:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -23,6 +23,7 @@
import cugraph
from cugraph.datasets import karate_disjoint
from cugraph.testing import utils, SMALL_DATASETS
from cugraph.utilities import nx_factory


# =============================================================================
Expand Down Expand Up @@ -304,6 +305,7 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
# =============================================================================
# Tests
# =============================================================================
@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", [False, True])
Expand Down Expand Up @@ -529,3 +531,45 @@ def test_betweenness_centrality_nx(graph_file, directed, edgevals):
print(f"{cugraph_bc[i][0]} and {networkx_bc[i][0]}")
print("Mismatches:", err)
assert err < (0.01 * len(cugraph_bc))


@pytest.mark.sg
@pytest.mark.parametrize(
("normalized", "endpoints", "is_directed", "k", "expected"),
[
(True, True, True, None, {0: 1.0, 1: 0.4, 2: 0.4, 3: 0.4, 4: 0.4}),
(True, True, True, 1, {0: 1.0, 1: 1.0, 2: 0.25, 3: 0.25, 4: 0.25}),
(True, True, False, None, {0: 1.0, 1: 0.4, 2: 0.4, 3: 0.4, 4: 0.4}),
(True, True, False, 1, {0: 1.0, 1: 1.0, 2: 0.25, 3: 0.25, 4: 0.25}),
(True, False, True, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, True, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, False, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, False, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, True, True, None, {0: 20.0, 1: 8.0, 2: 8.0, 3: 8.0, 4: 8.0}),
(False, True, True, 1, {0: 20.0, 1: 20.0, 2: 5.0, 3: 5.0, 4: 5.0}),
(False, True, False, None, {0: 10.0, 1: 4.0, 2: 4.0, 3: 4.0, 4: 4.0}),
(False, True, False, 1, {0: 10.0, 1: 10.0, 2: 2.5, 3: 2.5, 4: 2.5}),
(False, False, True, None, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, True, 1, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, False, None, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, False, 1, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
],
)
def test_scale_with_k_on_star_graph(normalized, endpoints, is_directed, k, expected):
# seed=1 selects node 1 as the initial node when using k=1.
# Recall node 0 is the center of the star graph.
Gnx = nx.star_graph(4)
if is_directed:
Gnx = Gnx.to_directed()

G = nx_factory.convert_from_nx(Gnx)

if k:
sorted_df = _calc_bc_subset(
G, Gnx, normalized, None, endpoints, k, 1, np.float32
)
else:
sorted_df = _calc_bc_full(G, Gnx, normalized, None, endpoints, k, 1, np.float32)

sorted_df["expected"] = expected.values()
compare_scores(sorted_df, first_key="cu_bc", second_key="expected")
Loading