Skip to content

Commit 81b287d

Browse files
committed
Add test that parallel leiden has nearly optimal scaling
1 parent e08349b commit 81b287d

File tree

1 file changed

+145
-0
lines changed

1 file changed

+145
-0
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from .shared_testing_functions import generate_connected_ER, generate_multilayer_intralayer_SBM
2+
from modularitypruning.leiden_utilities import (repeated_leiden_from_gammas, repeated_parallel_leiden_from_gammas,
3+
repeated_leiden_from_gammas_omegas,
4+
repeated_parallel_leiden_from_gammas_omegas)
5+
from multiprocessing import Pool, cpu_count
6+
from random import seed
7+
from time import time, sleep
8+
import functools
9+
import igraph as ig
10+
import numpy as np
11+
import psutil
12+
import unittest
13+
import warnings
14+
15+
# this set of tests ensures that we achieve >= 90% parallel performance
16+
# compared to perfect scaling of single-threaded jobs to multiple cores
17+
PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING = 0.90
18+
19+
20+
def mock_calculation(_):
21+
"""A mock calculation that provides enough work to make serialization overhead negligible."""
22+
return sum(range(10 ** 7))
23+
24+
25+
@functools.lru_cache(maxsize=1)
26+
def determine_target_parallelization_speedup(num_calculations=32):
27+
"""
28+
Calculate the parallelization speedup on mock_calculation to benchmark our implementation against.
29+
30+
This performs
31+
* ``num_calculations`` function calls in the single-threaded case, and
32+
* ``num_calculations * cpu_count()`` calls in the multi-processed case
33+
34+
Due in part to frequency scaling and simple memory contention, leidenalg over multiple processes (completely
35+
outside of Python or multiprocessing.Pool) seems to run at around (90% * core count) speedup on modern systems when
36+
hyper-threading is disabled.
37+
"""
38+
global PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING
39+
40+
sleep(5) # sleep to increase stability of the CPU utilization check
41+
cpu_utilization = psutil.cpu_percent()
42+
if cpu_utilization > 10:
43+
PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING = 0.5
44+
warnings.warn(f"System CPU utilization is non-negligible during parallel performance test! "
45+
f"Dropping performance scaling target to 50%.")
46+
47+
start_time = time()
48+
_ = [mock_calculation(i) for i in range(num_calculations)]
49+
base_duration = time() - start_time
50+
51+
num_pool_calculations = num_calculations * cpu_count()
52+
with Pool(processes=cpu_count()) as pool:
53+
pool.map(mock_calculation, range(cpu_count())) # force pool initialization and basic burn-in
54+
55+
start_time = time()
56+
pool.map(mock_calculation, range(num_pool_calculations))
57+
pool_duration = time() - start_time
58+
59+
return num_pool_calculations / num_calculations * base_duration / pool_duration
60+
61+
62+
class TestParallelLeidenPerformance(unittest.TestCase):
63+
@staticmethod
64+
def run_singlelayer_graph_parallelization(G, gammas):
65+
target_speedup = determine_target_parallelization_speedup()
66+
67+
start_time = time()
68+
_ = repeated_leiden_from_gammas(G, gammas)
69+
duration = time() - start_time
70+
71+
pool_gammas = np.linspace(min(gammas), max(gammas), len(gammas) * cpu_count())
72+
start_time = time()
73+
_ = repeated_parallel_leiden_from_gammas(G, pool_gammas)
74+
pool_duration = time() - start_time
75+
76+
speedup = len(pool_gammas) / len(gammas) * duration / pool_duration
77+
return speedup / target_speedup
78+
79+
@staticmethod
80+
def run_multilayer_graph_parallelization(G_intralayer, G_interlayer, layer_membership, gammas, omegas):
81+
target_speedup = determine_target_parallelization_speedup()
82+
83+
start_time = time()
84+
_ = repeated_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_membership, gammas, omegas)
85+
duration = time() - start_time
86+
87+
pool_gammas = np.linspace(min(gammas), max(gammas), int(len(gammas) * np.sqrt(cpu_count())))
88+
pool_omegas = np.linspace(min(omegas), max(omegas), int(len(omegas) * np.sqrt(cpu_count())))
89+
start_time = time()
90+
_ = repeated_parallel_leiden_from_gammas_omegas(
91+
G_intralayer, G_interlayer, layer_membership, pool_gammas, pool_omegas
92+
)
93+
pool_duration = time() - start_time
94+
95+
speedup = len(pool_gammas) * len(pool_omegas) / len(gammas) / len(omegas) * duration / pool_duration
96+
return speedup / target_speedup
97+
98+
def test_tiny_singlelayer_graph_many_runs(self):
99+
"""Single-threaded equivalent is 25k runs on G(n=34, m=78)."""
100+
G = ig.Graph.Famous("Zachary")
101+
gammas = np.linspace(0.0, 4.0, 25000)
102+
parallelization = self.run_singlelayer_graph_parallelization(G, gammas)
103+
self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
104+
105+
def test_larger_singlelayer_graph_few_runs(self):
106+
"""Single-threaded equivalent is 25 runs on G(n=10000, m=40000)."""
107+
G = generate_connected_ER(n=10000, m=40000, directed=False)
108+
gammas = np.linspace(0.0, 2.0, 25)
109+
parallelization = self.run_singlelayer_graph_parallelization(G, gammas)
110+
self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
111+
112+
def test_tiny_multilayer_graph_many_runs(self):
113+
"""Single-threaded equivalent is 10k runs on G(n=50, m=150)."""
114+
G_intralayer, layer_membership = generate_multilayer_intralayer_SBM(
115+
copying_probability=0.9, p_in=0.8, p_out=0.2, first_layer_membership=[0] * 5 + [1] * 5, num_layers=5
116+
)
117+
interlayer_edges = [(10 * layer + v, 10 * layer + v + 10)
118+
for layer in range(5 - 1) for v in range(10)]
119+
G_interlayer = ig.Graph(interlayer_edges, directed=True)
120+
121+
gammas = np.linspace(0.0, 2.0, 100)
122+
omegas = np.linspace(0.0, 2.0, 100)
123+
parallelization = self.run_multilayer_graph_parallelization(G_intralayer, G_interlayer,
124+
layer_membership, gammas, omegas)
125+
self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
126+
127+
def test_larger_multilayer_graph_few_runs(self):
128+
"""Single-threaded equivalent is 25 runs on approximately G(n=2500, m=15000)."""
129+
G_intralayer, layer_membership = generate_multilayer_intralayer_SBM(
130+
copying_probability=0.9, p_in=0.15, p_out=0.05, first_layer_membership=[0] * 50 + [1] * 50, num_layers=25
131+
)
132+
interlayer_edges = [(100 * layer + v, 100 * layer + v + 100)
133+
for layer in range(25 - 1) for v in range(100)]
134+
G_interlayer = ig.Graph(interlayer_edges, directed=True)
135+
136+
gammas = np.linspace(0.0, 2.0, 5)
137+
omegas = np.linspace(0.0, 2.0, 5)
138+
parallelization = self.run_multilayer_graph_parallelization(G_intralayer, G_interlayer,
139+
layer_membership, gammas, omegas)
140+
self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
141+
142+
143+
if __name__ == "__main__":
144+
seed(0)
145+
unittest.main()

0 commit comments

Comments
 (0)