Skip to content

Commit 3ea1cdb

Browse files
authored
Dispatching to_numpy_array (rapidsai#127)
Addresses rapidsai#115 ## Proposed Changes This PR enables dispatching of `to_numpy_array` Authors: - Ralph Liu (https://github.com/nv-rliu) Approvers: - Erik Welch (https://github.com/eriknw) URL: rapidsai#127
1 parent a1a438c commit 3ea1cdb

File tree

5 files changed

+171
-37
lines changed

5 files changed

+171
-37
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ Below is the list of algorithms that are currently supported in nx-cugraph.
298298
<a href="https://networkx.org/documentation/stable/reference/convert.html#module-networkx.convert_matrix">convert_matrix</a>
299299
├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_pandas_edgelist.html#networkx.convert_matrix.from_pandas_edgelist">from_pandas_edgelist</a>
300300
├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_scipy_sparse_array.html#networkx.convert_matrix.from_scipy_sparse_array">from_scipy_sparse_array</a>
301+
├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.to_numpy_array.html#networkx.convert_matrix.to_numpy_array">to_numpy_array</a>
301302
└─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.to_scipy_sparse_array.html#networkx.convert_matrix.to_scipy_sparse_array">to_scipy_sparse_array</a>
302303
<a href="https://networkx.org/documentation/stable/reference/linalg.html">linalg</a>
303304
└─ <a href="https://networkx.org/documentation/stable/reference/linalg.html#module-networkx.linalg.graphmatrix">graphmatrix</a>

_nx_cugraph/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@
159159
"tadpole_graph",
160160
"tetrahedral_graph",
161161
"to_dict_of_lists",
162+
"to_numpy_array",
162163
"to_scipy_sparse_array",
163164
"tournament_matrix",
164165
"transitivity",
@@ -212,6 +213,7 @@
212213
"single_source_bellman_ford": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
213214
"single_source_bellman_ford_path": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
214215
"single_source_bellman_ford_path_length": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
216+
"to_numpy_array": "MultiGraphs are not yet supported. Only valid CuPy dtypes are supported.",
215217
"transitivity": "Directed graphs are not yet supported.",
216218
# END: additional_docs
217219
},

benchmarks/pytest-based/bench_algos.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from collections.abc import Mapping
1616

1717
import networkx as nx
18+
import numpy as np
1819
import pandas as pd
1920
import pytest
2021
from cugraph import datasets
@@ -996,6 +997,20 @@ def bench_forceatlas2(benchmark, graph_obj, backend_wrapper):
996997
assert type(result) is dict
997998

998999

1000+
def bench_to_numpy_array(benchmark, graph_obj, backend_wrapper):
1001+
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
1002+
1003+
result = benchmark.pedantic(
1004+
target=backend_wrapper(nx.to_numpy_array),
1005+
args=(G,),
1006+
rounds=rounds,
1007+
iterations=iterations,
1008+
warmup_rounds=warmup_rounds,
1009+
)
1010+
1011+
assert isinstance(result, np.ndarray)
1012+
1013+
9991014
@pytest.mark.parametrize("nodes", ["default", "shuffle", "subset"])
10001015
def bench_to_scipy_sparse_array(benchmark, graph_obj, backend_wrapper, nodes):
10011016
# Same as bench_adjacency_matrix

nx_cugraph/classes/graph.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,3 +1262,43 @@ def _dict_to_nodearray(
12621262
if dtype is None:
12631263
return cp.array(list(val_iter))
12641264
return cp.fromiter(val_iter, dtype)
1265+
1266+
def _subgraph_indices(
1267+
self, nodelist: list[NodeKey] | None
1268+
) -> tuple[cp.ndarray[IndexValue], cp.ndarray[IndexValue], cp.ndarray[bool] | None]:
1269+
if nodelist is None:
1270+
return self.src_indices, self.dst_indices, None
1271+
1272+
node_ids = self._nodekeys_to_nodearray(nodelist)
1273+
# Subgraph
1274+
if len(node_ids) < self._N:
1275+
mapper = cp.empty(self._N, dtype=index_dtype)
1276+
mapper[:] = -1 # Indicate nodes to exclude
1277+
mapper[node_ids] = cp.arange(node_ids.size, dtype=index_dtype)
1278+
src_indices = mapper[self.src_indices]
1279+
dst_indices = mapper[self.dst_indices]
1280+
mask = (src_indices != -1) & (dst_indices != -1)
1281+
src_indices = src_indices[mask]
1282+
dst_indices = dst_indices[mask]
1283+
else:
1284+
mapper = cp.empty(self._N, dtype=index_dtype)
1285+
mapper[node_ids] = cp.arange(node_ids.size, dtype=index_dtype)
1286+
src_indices = mapper[self.src_indices]
1287+
dst_indices = mapper[self.dst_indices]
1288+
mask = None
1289+
1290+
return src_indices, dst_indices, mask
1291+
1292+
def _subgraph_weights(
1293+
self, mask: cp.ndarray[bool] | None, weight: AttrKey, default: EdgeValue = 1
1294+
):
1295+
if weight in self.edge_values:
1296+
edge_array = self.edge_values[weight]
1297+
if weight in self.edge_masks:
1298+
edge_array = cp.where(self.edge_masks[weight], edge_array, default)
1299+
else:
1300+
edge_array = cp.repeat(cp.array(default), self.src_indices.size)
1301+
if mask is not None:
1302+
edge_array = edge_array[mask]
1303+
1304+
return edge_array

nx_cugraph/convert_matrix.py

Lines changed: 113 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
13+
1314
import cupy as cp
1415
import networkx as nx
1516
import numpy as np
@@ -18,12 +19,17 @@
1819

1920
from .convert import _to_graph
2021
from .generators._utils import _create_using_class
21-
from .utils import _cp_iscopied_asarray, index_dtype, networkx_algorithm
22+
from .utils import (
23+
_cp_iscopied_asarray,
24+
index_dtype,
25+
networkx_algorithm,
26+
)
2227

2328
__all__ = [
2429
"from_pandas_edgelist",
2530
"from_scipy_sparse_array",
2631
"to_scipy_sparse_array",
32+
"to_numpy_array",
2733
]
2834

2935

@@ -187,6 +193,7 @@ def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format=
187193
else:
188194
src_indices = G.src_indices
189195
dst_indices = G.dst_indices
196+
mask = None
190197
else:
191198
nlen = len(nodelist)
192199
if nlen == 0:
@@ -199,47 +206,18 @@ def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format=
199206
if is_empty:
200207
src_indices = dst_indices = edge_array = ()
201208
else:
202-
node_ids = G._nodekeys_to_nodearray(nodelist)
203-
204-
# Subgraph
205-
if nlen < G._N:
206-
# TODO: create utility funcs for renumbering/reordering node_ids.
207-
# Using `mapper` like this is a useful trick that may not be obvious.
208-
mapper = cp.empty(G._N, dtype=index_dtype)
209-
mapper[:] = -1 # Indicate nodes to exclude
210-
mapper[node_ids] = cp.arange(node_ids.size, dtype=index_dtype)
211-
src_indices = mapper[G.src_indices]
212-
dst_indices = mapper[G.dst_indices]
213-
mask = (src_indices != -1) & (dst_indices != -1)
214-
src_indices = src_indices[mask]
215-
if src_indices.size == 0:
216-
is_empty = True
217-
src_indices = dst_indices = edge_array = ()
218-
else:
219-
dst_indices = dst_indices[mask]
220-
221-
# All nodes, reordered
222-
else:
223-
# TODO: create utility funcs for renumbering/reordering node_ids.
224-
# Using `mapper` like this is a useful trick that may not be obvious.
225-
mapper = cp.empty(G._N, dtype=index_dtype)
226-
mapper[node_ids] = cp.arange(node_ids.size, dtype=index_dtype)
227-
src_indices = mapper[G.src_indices]
228-
dst_indices = mapper[G.dst_indices]
209+
src_indices, dst_indices, mask = G._subgraph_indices(nodelist)
210+
211+
if src_indices.size == 0:
212+
is_empty = True
213+
src_indices = dst_indices = edge_array = ()
229214

230215
if not is_empty:
231216
src_indices = cp.asnumpy(src_indices)
232217
dst_indices = cp.asnumpy(dst_indices)
233218

234-
if weight in G.edge_values:
235-
edge_array = G.edge_values[weight]
236-
if weight in G.edge_masks:
237-
edge_array = cp.where(G.edge_masks[weight], edge_array, 1)
238-
if nlen < G._N:
239-
edge_array = edge_array[mask]
240-
edge_array = cp.asnumpy(edge_array)
241-
else:
242-
edge_array = np.repeat(1, src_indices.size)
219+
edge_array = G._subgraph_weights(mask, weight, 1)
220+
edge_array = cp.asnumpy(edge_array)
243221

244222
# PERF: convert to desired sparse format on GPU before copying to CPU
245223
A = sp.sparse.coo_array(
@@ -276,3 +254,101 @@ def from_scipy_sparse_array(
276254
if inplace:
277255
return create_using._become(G)
278256
return G
257+
258+
259+
@networkx_algorithm(version_added="25.06")
260+
def to_numpy_array(
261+
G,
262+
nodelist=None,
263+
dtype=None,
264+
order=None,
265+
multigraph_weight=sum,
266+
weight="weight",
267+
nonedge=0.0,
268+
):
269+
"""MultiGraphs are not yet supported. Only valid CuPy dtypes are supported."""
270+
if dtype is None:
271+
dtype = np.float64
272+
dtype = np.dtype(dtype)
273+
274+
G = _to_graph(G, weight, 1, dtype)
275+
276+
if nodelist is not None:
277+
N = len(nodelist)
278+
# use set to check for nodes not in the graph or duplicate nodes
279+
nodelist_as_set = set(nodelist)
280+
if nodelist_as_set - set(G):
281+
raise nx.NetworkXError(
282+
f"Nodes {nodelist_as_set - set(G)} in nodelist is not in G"
283+
)
284+
if len(nodelist_as_set) < N:
285+
raise nx.NetworkXError(f"Nodelist {nodelist} contains duplicates")
286+
else:
287+
N = G._N
288+
289+
use_numpy = dtype.names is not None
290+
if not use_numpy:
291+
# May run out of GPU memory on large graphs
292+
try:
293+
A = cp.full((N, N), fill_value=nonedge, dtype=dtype, order=order)
294+
except MemoryError:
295+
use_numpy = True
296+
if use_numpy:
297+
# Most likely will also run out of CPU memory on large graphs
298+
A = np.full((N, N), fill_value=nonedge, dtype=dtype, order=order)
299+
300+
# Case: graph with no nodes
301+
if N == 0:
302+
return cp.asnumpy(A)
303+
304+
# assume edge_attrs is None unless other weight value is specified
305+
edge_attrs = None
306+
if A.dtype.names:
307+
if weight is None:
308+
edge_attrs = dtype.names
309+
else:
310+
raise ValueError(
311+
"Specifying `weight` not supported for structured dtypes\n."
312+
"To create adjacency matrices from structured dtypes,"
313+
"use `weight=None`."
314+
)
315+
316+
src_indices, dst_indices, mask = G._subgraph_indices(nodelist)
317+
318+
if edge_attrs:
319+
# TODO could convert this logic into a util function if
320+
# more np structured arrays need support in the future
321+
edge_array = np.empty(src_indices.size, dtype=dtype)
322+
for edge_attr in edge_attrs:
323+
if edge_attr in G.edge_values:
324+
e_array = G.edge_values[edge_attr]
325+
if edge_attr in G.edge_masks:
326+
e_array = cp.where(G.edge_masks[edge_attr], e_array, 1)
327+
else:
328+
e_array = np.ones(G.src_indices.size, dtype=dtype)
329+
edge_array[edge_attr] = cp.asnumpy(e_array)
330+
else:
331+
edge_array = G._subgraph_weights(mask, weight, 1)
332+
333+
if use_numpy:
334+
src_indices = cp.asnumpy(src_indices)
335+
dst_indices = cp.asnumpy(dst_indices)
336+
edge_array = cp.asnumpy(edge_array)
337+
338+
A[src_indices, dst_indices] = edge_array
339+
340+
return cp.asnumpy(A)
341+
342+
343+
@to_numpy_array._can_run
344+
def _(
345+
G,
346+
nodelist=None,
347+
dtype=None,
348+
order=None,
349+
multigraph_weight=sum,
350+
weight="weight",
351+
nonedge=0.0,
352+
):
353+
# TODO handle multigraphs
354+
return not G.is_multigraph()

0 commit comments

Comments
 (0)