Skip to content

Commit 061dfa1

Browse files
committed
Significant performance improvement (hopefully) based on more intelligent construction of maximal cliques
1 parent d410a72 commit 061dfa1

File tree

1 file changed

+143
-98
lines changed

1 file changed

+143
-98
lines changed

inflation/cliques_with_symmetry.py

Lines changed: 143 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,87 @@
11
import numpy as np
2-
from collections import deque
3-
from typing import Tuple, Set
2+
from collections import deque, defaultdict
3+
from typing import Tuple, Set, List
44
import numba as nb
5-
5+
# from itertools import chain
6+
7+
# @nb.njit(nb.boolean(nb.boolean[:], nb.boolean[:]), fastmath=True, cache=False)
8+
# def is_subset_numba(candidate_mask: np.ndarray, potential_super_mask: np.ndarray) -> bool:
9+
# """
10+
# Checks if `candidate_mask` is a subset of `potential_super_mask`
11+
# using a clean, Pythonic `zip` that Numba compiles efficiently.
12+
#
13+
# This is equivalent to `set(candidate) <= set(super)`.
14+
# """
15+
# # Numba has a specialized, fast implementation for zipping NumPy arrays.
16+
# # This avoids manual indexing and is highly readable.
17+
# for b_candidate, b_super in zip(candidate_mask, potential_super_mask):
18+
# # If an element is in the candidate but not in the potential superset...
19+
# if b_candidate and not b_super:
20+
# # ...then it's not a subset.
21+
# return False
22+
# # If the loop completes without returning, it must be a subset.
23+
# return True
24+
25+
# @nb.njit(nb.boolean(nb.boolean[:,:], nb.boolean[:]), fastmath=True, cache=False)
26+
# def is_any_subset_numba(candidate_masks: np.ndarray, potential_super_mask: np.ndarray) -> bool:
27+
# for candidate_mask in candidate_masks:
28+
# if is_subset_numba(candidate_mask, potential_super_mask):
29+
# return True
30+
# return False
631

732
@nb.njit(nb.boolean(nb.boolean[:], nb.boolean[:]), fastmath=True, cache=False)
8-
def is_subset_numba(candidate_mask: np.ndarray, potential_super_mask: np.ndarray) -> bool:
33+
def is_strict_subset_numba(candidate_mask: np.ndarray, potential_super_mask: np.ndarray) -> bool:
934
"""
1035
Checks if `candidate_mask` is a subset of `potential_super_mask`
1136
using a clean, Pythonic `zip` that Numba compiles efficiently.
1237
1338
This is equivalent to `set(candidate) <= set(super)`.
1439
"""
15-
# Numba has a specialized, fast implementation for zipping NumPy arrays.
16-
# This avoids manual indexing and is highly readable.
17-
for b_candidate, b_super in zip(candidate_mask, potential_super_mask):
18-
# If an element is in the candidate but not in the potential superset...
19-
if b_candidate and not b_super:
20-
# ...then it's not a subset.
21-
return False
22-
# If the loop completes without returning, it must be a subset.
23-
return True
24-
25-
26-
# The rest of your filtering function remains the same, as it just calls this one.
27-
def filter_maximal_cliques_numpy(clique_masks: np.ndarray) -> np.ndarray:
28-
if clique_masks.shape[0] < 2:
29-
return clique_masks
30-
31-
sizes = np.sum(clique_masks, axis=1)
32-
desc_sort_indices = np.argsort(-sizes)
33-
sorted_masks = clique_masks[desc_sort_indices]
34-
35-
maximal_indices = []
36-
for i in range(sorted_masks.shape[0]):
37-
candidate_mask = sorted_masks[i]
38-
is_dominated = False
39-
40-
for max_idx in maximal_indices:
41-
maximal_mask = sorted_masks[max_idx]
42-
if is_subset_numba(candidate_mask, maximal_mask):
43-
is_dominated = True
44-
break
45-
46-
if not is_dominated:
47-
maximal_indices.append(i)
48-
49-
return sorted_masks[maximal_indices]
50-
51-
52-
def find_cliques_symmetric(
40+
return (np.all(potential_super_mask[candidate_mask]) and
41+
np.any(potential_super_mask[np.logical_not(candidate_mask)]))
42+
# return (is_subset_numba(candidate_mask, potential_super_mask)
43+
# and
44+
# not is_subset_numba(potential_super_mask, candidate_mask))
45+
46+
@nb.njit(nb.boolean(nb.boolean[:,:], nb.boolean[:]), fastmath=True, cache=False)
47+
def is_any_strict_subset_numba(candidate_masks: np.ndarray, potential_super_mask: np.ndarray) -> bool:
48+
for candidate_mask in candidate_masks:
49+
if is_strict_subset_numba(candidate_mask, potential_super_mask):
50+
return True
51+
return False
52+
53+
# # The rest of your filtering function remains the same, as it just calls this one.
54+
# def filter_maximal_cliques_numpy(clique_masks: np.ndarray) -> np.ndarray:
55+
# if clique_masks.shape[0] < 2:
56+
# return clique_masks
57+
#
58+
# sizes = np.sum(clique_masks, axis=1)
59+
# desc_sort_indices = np.argsort(-sizes)
60+
# sorted_masks = clique_masks[desc_sort_indices]
61+
#
62+
# maximal_indices = []
63+
# for i in range(sorted_masks.shape[0]):
64+
# candidate_mask = sorted_masks[i]
65+
# is_dominated = False
66+
#
67+
# for max_idx in maximal_indices:
68+
# maximal_mask = sorted_masks[max_idx]
69+
# if is_subset_numba(candidate_mask, maximal_mask):
70+
# is_dominated = True
71+
# break
72+
#
73+
# if not is_dominated:
74+
# maximal_indices.append(i)
75+
#
76+
# return sorted_masks[maximal_indices]
77+
78+
79+
def all_and_maximal_cliques_symmetry(
5380
adj_matrix: np.ndarray,
5481
automorphisms: np.ndarray,
55-
max_n: int = 0
56-
) -> np.ndarray:
82+
max_n: int = 0,
83+
isolate_maximal: bool = False,
84+
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
5785
"""
5886
Finds ALL and MAXIMAL cliques in a graph using a high-performance,
5987
boolean-mask-based search that is pruned by graph symmetry and
@@ -67,24 +95,28 @@ def find_cliques_symmetric(
6795
The graph's automorphism group as a (k, n) NumPy array.
6896
max_n : int
6997
An integer for maximal clique length. Zero means unrestricted.
98+
isolate_maximal : bool, optional
99+
A flag to disable filtering for maximality, which can increase performance. True by default.
70100
71101
Returns
72102
-------
73-
np.ndarray
74-
A boolean NumPy arrays whose rows are clique bitmasks.
103+
Tuple[List, List]
104+
A list of all cliques as well as a list of maximal cliques. The maximal cliques list will be empty if the
105+
`isolate_maximal` flag is set to False.
75106
"""
76107
num_vertices = adj_matrix.shape[0]
77108
if num_vertices == 0:
78109
return np.empty((0, 0), dtype=bool)
79110

80111
nbrs_masks = adj_matrix.astype(bool)
81-
# all_found_cliques = []
82-
all_found_clique_masks = []
83-
# representative_found_cliques = []
84-
# representative_found_clique_masks = []
112+
all_found_cliques = defaultdict(list)
113+
maximal_found_cliques = defaultdict(list)
85114
seen_canonical_subproblems: Set[Tuple[Tuple[int, ...], Tuple[int, ...]]] = set()
86115
queue = deque()
87-
116+
identity = automorphisms[0]
117+
assert len(identity) == num_vertices, "Automorphism group wrong size for given adjacency matrix."
118+
assert np.array_equal(identity, np.sort(identity)), "First element of automorphism group should be the identity."
119+
# doubled_automorphisms = np.hstack((automorphisms, automorphisms+num_vertices))
88120
# --- 1. Initialize search from one representative vertex per orbit ---
89121
visited_init = np.zeros(num_vertices, dtype=bool)
90122
for i in range(num_vertices):
@@ -102,8 +134,8 @@ def find_cliques_symmetric(
102134

103135
# --- 2. Main search loop using boolean masks ---
104136
while queue:
137+
# print("Queue size:", len(queue))
105138
base_mask, cnbrs_mask = queue.popleft()
106-
# representative_found_clique_masks.append(base_mask)
107139
base_indices = np.flatnonzero(base_mask)
108140
cnbrs_indices = np.flatnonzero(cnbrs_mask)
109141

@@ -122,15 +154,14 @@ def find_cliques_symmetric(
122154
canonical_rep = (canonical_base_tuple, canonical_cnbrs_tuple)
123155

124156
if canonical_rep in seen_canonical_subproblems:
157+
# print("Yay, symmetry to the rescue!")
125158
continue
126159
seen_canonical_subproblems.add(canonical_rep)
127160

128161
# --- B. GENERATE CLIQUE ORBIT ---
129-
# unique_in_orbit, where_unique = np.unique(permuted_bases, axis=0, return_index=True)
130-
# all_found_cliques.extend(unique_in_orbit)
131-
all_found_clique_masks.extend(np.unique(base_mask[automorphisms], axis=0))
132-
133-
162+
clique_size = len(base_indices)
163+
newly_discovered_cliques = np.unique(permuted_bases, axis=0)
164+
all_found_cliques[clique_size].extend(newly_discovered_cliques)
134165

135166
# --- C. EXPLORE CHILDREN (WITH ORDERED-CANDIDATE PRUNING) ---
136167
# This is the corrected and optimized loop.
@@ -149,44 +180,59 @@ def find_cliques_symmetric(
149180
new_cnbrs_mask[:u + 1] = False
150181

151182
queue.append((new_base_mask, new_cnbrs_mask))
152-
all_found_clique_masks = np.unique(all_found_clique_masks, axis=0).astype(bool)
153-
print("Queue complete.")
154-
return all_found_clique_masks
155-
156-
def all_and_maximal_cliques_symmetry(adjmat: np.ndarray,
157-
symgroup: np.ndarray,
158-
max_n=0,
159-
isolate_maximal=True) -> (np.ndarray, np.ndarray):
160-
"""Based on NetworkX's `enumerate_all_cliques`.
161-
This version uses native Python sets instead of numpy arrays.
162-
(Performance comparison needed.)
163-
164-
Parameters
165-
----------
166-
adjmat : numpy.ndarray
167-
A boolean numpy array representing the adjacency matrix of an undirected graph.
168-
symgroup : numpy.ndarray
169-
The graph's automorphism group as a (k, n) NumPy array.
170-
max_n : int, optional
171-
A cutoff for clique size reporting. Default 0, meaning no cutoff.
172-
isolate_maximal : bool, optional
173-
A flag to disable filtering for maximality, which can increase performance. True by default.
174183

175-
Returns
176-
-------
177-
Tuple[List, List]
178-
A list of all cliques as well as a list of maximal cliques. The maximal cliques list will be empty if the
179-
`isolate_maximal` flag is set to False. Cliques are returned as boolean bitmasks.
180-
"""
181-
all_cliques = find_cliques_symmetric(adjmat, symgroup, max_n=max_n)
182-
if isolate_maximal:
183-
max_cliques = filter_maximal_cliques_numpy(all_cliques)
184-
else:
185-
max_cliques = np.empty((0, adjmat.shape[0]), dtype=bool)
186-
return (sorted([np.flatnonzero(bm).tolist() for bm in all_cliques], key=len),
187-
sorted([np.flatnonzero(bm).tolist() for bm in max_cliques], key=len))
188-
# return (all_cliques,
189-
# max_cliques)
184+
# --- D. FILTER FOR MAXIMALITY
185+
if isolate_maximal:
186+
# print("Next queue", [(np.flatnonzero(base), np.flatnonzero(cnbrs)) for (base, cnbrs) in queue])
187+
newly_discovered_clique_masks = np.unique(base_mask[automorphisms], axis=0)
188+
if not any(is_any_strict_subset_numba(newly_discovered_clique_masks,
189+
superbase) for (superbase, _) in queue):
190+
# print("YES adding ", newly_discovered_cliques)
191+
maximal_found_cliques[clique_size].extend(newly_discovered_cliques)
192+
193+
all_found_cliques_list = [[]]
194+
for v in all_found_cliques.values():
195+
all_found_cliques_list.extend(np.unique(v, axis=0).tolist())
196+
maximal_found_cliques_list = []
197+
for v in maximal_found_cliques.values():
198+
maximal_found_cliques_list.extend(np.unique(v, axis=0).tolist())
199+
# print("Queue complete.")
200+
return (all_found_cliques_list, maximal_found_cliques_list)
201+
202+
# def all_and_maximal_cliques_symmetry(adjmat: np.ndarray,
203+
# symgroup: np.ndarray,
204+
# max_n=0,
205+
# isolate_maximal=True) -> (np.ndarray, np.ndarray):
206+
# """Based on NetworkX's `enumerate_all_cliques`.
207+
# This version uses native Python sets instead of numpy arrays.
208+
# (Performance comparison needed.)
209+
#
210+
# Parameters
211+
# ----------
212+
# adjmat : numpy.ndarray
213+
# A boolean numpy array representing the adjacency matrix of an undirected graph.
214+
# symgroup : numpy.ndarray
215+
# The graph's automorphism group as a (k, n) NumPy array.
216+
# max_n : int, optional
217+
# A cutoff for clique size reporting. Default 0, meaning no cutoff.
218+
# isolate_maximal : bool, optional
219+
# A flag to disable filtering for maximality, which can increase performance. True by default.
220+
#
221+
# Returns
222+
# -------
223+
# Tuple[List, List]
224+
# A list of all cliques as well as a list of maximal cliques. The maximal cliques list will be empty if the
225+
# `isolate_maximal` flag is set to False. Cliques are returned as boolean bitmasks.
226+
# """
227+
# all_cliques = find_cliques_symmetric(adjmat, symgroup, max_n=max_n)
228+
# if isolate_maximal:
229+
# max_cliques = filter_maximal_cliques_numpy(all_cliques)
230+
# else:
231+
# max_cliques = np.empty((0, adjmat.shape[0]), dtype=bool)
232+
# return (sorted([np.flatnonzero(bm).tolist() for bm in all_cliques], key=len),
233+
# sorted([np.flatnonzero(bm).tolist() for bm in max_cliques], key=len))
234+
# # return (all_cliques,
235+
# # max_cliques)
190236

191237
if __name__ == '__main__':
192238
### Example Usage
@@ -226,8 +272,7 @@ def all_and_maximal_cliques_symmetry(adjmat: np.ndarray,
226272
# All vertices in the Petersen graph are symmetric, so there is only one orbit.
227273
# The algorithm will start a search from vertex 0, find the edge {0,1},
228274
# then prune all other searches that would lead to finding other edges.
229-
all_cliques= find_cliques_symmetric(petersen_adj, petersen_autos)
275+
all_cliques, maximal_cliques = all_and_maximal_cliques_symmetry(petersen_adj, petersen_autos, isolate_maximal=True)
230276

231-
print([np.flatnonzero(bm).tolist() for bm in all_cliques])
232-
max_cliques_petersen = filter_maximal_cliques_numpy(all_cliques)
233-
print([np.flatnonzero(bm).tolist() for bm in max_cliques_petersen])
277+
print(all_cliques)
278+
print(maximal_cliques)

0 commit comments

Comments
 (0)