Skip to content

Commit bdf5070

Browse files
committed
fixes
1 parent 499a60d commit bdf5070

8 files changed

Lines changed: 100 additions & 60 deletions

File tree

docs/source/newproperties.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ This results in the following (isolated molecules not shown)
9696
+----------------------------+
9797

9898

99+
Surface clusters
100+
--------
101+
102+
The value of :py:obj:`atoms.surface_clusters` identify the surface cluster for
103+
classes like :class:`~pytim.itim.GITIM`, provided that the option :obj:`surface_cluster_cut`
104+
is not `None`. The label is -1 if the atom is not a surface one, and a progressive number
105+
in order of growing size of surface atom clusters.
106+
107+
99108
Sides
100109
-----
101110

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies = [
3939
"cython>=3.0.8",
4040
"gsd>3.0.0",
4141
"MDAnalysis>=2.8.0",
42-
"scipy>=1.12.0",
42+
"scipy>=1.15.0",
4343
"setuptools",
4444
"PyWavelets>=1.8.0",
4545
"scikit-image>=0.24.0",

pytim/gitim.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ class GITIM(Interface):
5454
search, if cluster_cut is not None
5555
:param Object extra_cluster_groups: Additional groups, to allow for
5656
mixed interfaces
57+
:param int extra_cluster_count: Number of clusters (sorted by decreasing
58+
size) to be considered the majority component of
59+
the opposite phase.
60+
5761
:param int n_clusters: Tag as surface atoms/molecules only
5862
those in the n_clusters largest clusters.
5963
Default: None, uses all clusters.
@@ -75,7 +79,6 @@ class GITIM(Interface):
7579
cluster of (initially detected) surface
7680
ones.
7781
Default: None, disables the filtering.
78-
:param str symmetry: Gives the code a hint about the topology
7982
:param str symmetry: Gives the code a hint about the topology
8083
of the interface: 'generic' (default)
8184
or 'planar'
@@ -143,6 +146,7 @@ def __init__(self,
143146
include_zero_radius=False,
144147
cluster_threshold_density=None,
145148
extra_cluster_groups=None,
149+
extra_cluster_count=1,
146150
n_clusters=None,
147151
min_cluster_size=None,
148152
biggest_cluster_only = False, # backward compatibility, sets n_clusters
@@ -290,10 +294,9 @@ def _assign_layers_setup(self):
290294
# from the triangulation and updating the circumradius of the neighbors
291295
# of the removed points only.
292296

293-
dbs = utilities.do_cluster_analysis_dbscan
294-
return alpha_group, dbs
297+
return alpha_group
295298

296-
def _assign_layers_postprocess(self, dbs, group, alpha_group, layer):
299+
def _assign_layers_postprocess(self, group, alpha_group, layer):
297300
if len(group) > 0:
298301
if self.molecular:
299302
group = group.residues.atoms
@@ -303,12 +306,20 @@ def _assign_layers_postprocess(self, dbs, group, alpha_group, layer):
303306
alpha_group = alpha_group[:] - group[:]
304307
self.label_group(
305308
self._layers[layer], beta=1. * (layer + 1), layer=(layer + 1))
309+
310+
if self.surface_cluster_cut is not None:
311+
self.label_group(group.universe.atoms,surface_cluster=-1)
312+
sorted_groups = self._generate_surface_clusters(
313+
group, self.surface_cluster_cut)
314+
for i,g in enumerate(sorted_groups):
315+
self.label_group(g,surface_cluster=i)
316+
306317
return alpha_group
307318

308319
def _assign_layers(self):
309320
"""Determine the GITIM layers."""
310321

311-
alpha_group, dbs = self._assign_layers_setup()
322+
alpha_group = self._assign_layers_setup()
312323

313324
self.triangulation = [] # storage for triangulations
314325

@@ -317,12 +328,9 @@ def _assign_layers(self):
317328
alpha_ids = self.alpha_shape(self.alpha, alpha_group, layer)
318329

319330
group = alpha_group[alpha_ids]
320-
if self.surface_cluster_cut is not None:
321-
group = self._generate_surface_clusters(
322-
group, self.surface_cluster_cut)
323331

324332
alpha_group = self._assign_layers_postprocess(
325-
dbs, group, alpha_group, layer)
333+
group, alpha_group, layer)
326334

327335
# reset the interpolator
328336
self._interpolator = None

pytim/interface.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ class Interface(object):
6868
extra_cluster_groups, _extra_cluster_groups =\
6969
_create_property('extra_cluster_groups',
7070
"(ndarray) additional cluster groups")
71+
72+
extra_cluster_count, _extra_cluster_count=\
73+
_create_property('extra_cluster_count',
74+
"(int) maximum number of extra group clusters (sorted by decreasing size) to exclude.")
75+
7176
radii_dict, _radii_dict =\
7277
_create_property('radii_dict', "(dict) custom atomic radii")
7378

@@ -126,6 +131,7 @@ def label_group(self,
126131
beta=None,
127132
layer=None,
128133
cluster=None,
134+
surface_cluster=None,
129135
side=None):
130136
if group is None:
131137
raise RuntimeError(
@@ -146,6 +152,8 @@ def label_group(self,
146152
_group.sides = side
147153
if cluster is not None:
148154
_group.clusters = cluster
155+
if surface_cluster is not None:
156+
_group.surface_clusters = surface_cluster
149157

150158
def _assign_symmetry(self, symmetry):
151159
if self.analysis_group is None:
@@ -158,72 +166,71 @@ def _assign_symmetry(self, symmetry):
158166
self.symmetry = symmetry
159167

160168
def _generate_surface_clusters(self, group, cut):
161-
# at the moment, selects only the biggest cluster
162-
labels, counts, neighs = utilities.do_cluster_analysis_dbscan(
163-
group, cut,
169+
labels, counts, neighs, _ = utilities.do_cluster_analysis_dbscan(
170+
group=group, cluster_cut=cut,threshold_density=None,
164171
molecular=False)
165-
return group[np.where(labels == np.argmax(counts))[0]]
172+
sortid = np.argsort(counts,stable=True)[::-1]
173+
sortid = sortid[counts[sortid]>0] # just the clusters with more than one element
174+
self.surface_clusters = [group[labels==s] for s in sortid]
175+
return self.surface_clusters
166176

167177
def _define_cluster_group(self):
168178
self.universe.atoms.pack_into_box()
169179
self.cluster_group = self.universe.atoms[:0] # empty
170180
if (self.cluster_cut is not None):
171-
cluster_cut = float(self.cluster_cut[0])
172181
# we start by adding the atoms in the smaller clusters
173-
# of the opposit phase, if extra_cluster_groups are provided
182+
# of the opposite phase, if extra_cluster_groups are provided
183+
self._min_samples=[None]
174184
if (self.extra_cluster_groups is not None):
175-
for extra in self.extra_cluster_groups:
176-
x_labels, x_counts, _ = utilities.do_cluster_analysis_dbscan(
177-
extra, cluster_cut, self.cluster_threshold_density,
178-
self.molecular)
185+
for i,extra in enumerate(self.extra_cluster_groups):
186+
if len(self.cluster_cut) == 1:
187+
cluster_cut = self.cluster_cut[0]
188+
else:
189+
cluster_cut = self.cluster_cut[i+1]
190+
if len(self.cluster_threshold_density) == 1:
191+
cluster_threshold_density = self.cluster_threshold_density[0]
192+
else:
193+
cluster_threshold_density = self.cluster_threshold_density[i+1]
194+
x_labels, x_counts, _ , min_samples = utilities.do_cluster_analysis_dbscan(
195+
group=extra, cluster_cut=cluster_cut,
196+
threshold_density=cluster_threshold_density,
197+
molecular=self.molecular)
179198
x_labels = np.array(x_labels)
180-
x_label_max = np.argmax(x_counts)
181-
x_ids_other = np.where(x_labels != x_label_max)[0]
182-
199+
x_label_selection = np.argsort(x_counts)[::-1][:self.extra_cluster_count]
200+
x_ids_other = np.where(~np.isin(x_labels, x_label_selection))[0]
201+
self._min_samples.append(float(min_samples))
183202
self.cluster_group += extra[x_ids_other]
184-
203+
self.minority_cluster_group = extra[x_ids_other]
185204
# next, we add the atoms belonging to the main phase
186205
self.cluster_group += self.analysis_group
187206

188207
# groups have been checked already in _sanity_checks()
189208
# self.cluster_group at this stage is composed of analysis_group +
190209
# the smaller clusters of the other phase
191-
labels, counts, neighbors = utilities.do_cluster_analysis_dbscan(
192-
self.cluster_group, cluster_cut,
193-
self.cluster_threshold_density, self.molecular)
210+
labels, counts, neighbors, min_samples = utilities.do_cluster_analysis_dbscan(
211+
self.cluster_group, self.cluster_cut[0],
212+
self.cluster_threshold_density[0], self.molecular)
194213
labels = np.array(labels)
195-
214+
self._min_samples[0] = float(min_samples)
196215
# counts is not necessarily ordered by size of cluster.
216+
# we sort it and remember that its index corresponds to the
217+
# label
197218
sorting = np.argsort(counts,kind='stable')[::-1]
198219
# labels for atoms in each cluster starting from the largest
199-
unique_labels = np.sort(np.unique(labels[labels > -1]))
220+
# discarding cases where counts are zero (exhausted the labels)
221+
unique_labels = [int(lab) for lab in sorting if counts[lab] > 0]
200222
# by default, all elements of the cluster_group are in
201223
# single-molecule/atom clusters. We will update them right after.
202224
self.label_group(self.cluster_group, cluster=-1)
203-
# we go in reverse order to let smaller labels (bigger clusters)
204-
# overwrite larger labels (smaller cluster) when the molecular
225+
# we let bigger clusters overwrite smaller cluster when the molecular
205226
# option is used.
206227
for el in unique_labels[::-1]:
207228
# select a label
208-
cond = np.where(labels == el)
229+
cond = (labels == el)
209230
if self.molecular is True:
210231
g_ = self.cluster_group[cond].residues.atoms
211232
else:
212233
g_ = self.cluster_group[cond]
213-
# probably we need an example here, say:
214-
# counts = [ 61, 1230, 34, 0, ... 0 ,0 ]
215-
# labels = [ 0, 1, 2, 1, -1 .... -1 ]
216-
# we have three clusters, of 61, 1230 and 34 atoms.
217-
# There are 61 labels '0'
218-
# 1230 labels '1'
219-
# 34 labels '2'
220-
# the remaining are '-1'
221-
#
222-
# sorting = [1,0,2,3,....] i.e. the largest element is in
223-
# (1230) position 1, the next (61) is in position 0, ...
224-
# Say, g_ is now the group with label '1' (the biggest cluster)
225-
# Using argwhere(sorting==1) returns exactly 0 -> the right
226-
# ordered label for the largest cluster.
227234
self.label_group(g_, cluster=np.argwhere(sorting == el)[0, 0])
228235
# now that labels are assigned for each of the clusters,
229236
# we can restric the cluster group to the largest cluster.
@@ -253,6 +260,8 @@ def _define_cluster_group(self):
253260
else:
254261
self.cluster_group = self.analysis_group
255262
self.label_group(self.cluster_group, cluster=0)
263+
if len(self.cluster_group) == 0:
264+
raise ValueError('Empty cluster group: change your cluster search settings.')
256265

257266
def is_buried(self, pos):
258267
""" Checks wether an array of positions are located below

pytim/properties.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ class Clusters(MDAnalysis.core.topologyattrs.AtomAttr):
2323
per_object = 'atom'
2424

2525

26+
class SurfaceClusters(MDAnalysis.core.topologyattrs.AtomAttr):
27+
"""Clusters for each surface atom"""
28+
attrname = 'surface_clusters'
29+
singular = 'surface_cluster'
30+
per_object = 'atom'
31+
32+
2633
class Sides(MDAnalysis.core.topologyattrs.AtomAttr):
2734
"""Sides for each atom"""
2835
attrname = 'sides'
@@ -82,7 +89,7 @@ def _missing_attributes(interface, universe):
8289
def _extra_attributes(interface, universe):
8390
# we add here the new layer, cluster and side information
8491
# they are not part of MDAnalysis.core.topologyattrs
85-
attr = {'layers': Layers, 'clusters': Clusters, 'sides': Sides}
92+
attr = {'layers': Layers, 'clusters': Clusters, 'sides': Sides, 'surface_clusters': SurfaceClusters}
8693
for key in attr.keys():
8794
if key not in dir(universe.atoms):
8895
vals = np.zeros(len(universe.atoms), dtype=int) - 1

pytim/sanity_check.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,19 @@ def assign_cluster_params(self,
111111
elements = 0
112112
extraelements = -1
113113

114-
self.interface.cluster_threshold_density = cluster_threshold_density
115114

116115
# we first make sure cluster_cut is either None, or an array
117116
if isinstance(cluster_cut, (int, float)):
118117
self.interface.cluster_cut = np.array([float(cluster_cut)])
119118
else:
120119
self.interface.cluster_cut = cluster_cut
121120

121+
# same with the cluster threshold
122+
if isinstance(cluster_threshold_density, (int, float, str, type(None))):
123+
self.interface.cluster_threshold_density = np.array([cluster_threshold_density])
124+
else:
125+
self.interface.cluster_threshold_density = cluster_threshold_density
126+
122127
# same with extra_cluster_groups
123128
if not isinstance(extra_cluster_groups,
124129
(list, tuple, np.ndarray, type(None))):

pytim/utilities_dbscan.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def determine_samples(threshold_density, cluster_cut, n_neighbors):
1919
elif (threshold_density == 'auto'):
2020
modes = 2
2121
centroid, _ = vq.kmeans2(
22-
n_neighbors * 1.0, modes, iter=10, check_finite=False)
22+
n_neighbors * 1.0, modes, iter=10, check_finite=False,
23+
rng=5317) # rng used to set the seed for reproducibile results
2324
min_samples = np.max(centroid)
2425

2526
else:
@@ -35,11 +36,13 @@ def do_cluster_analysis_dbscan(group,
3536
molecular=True):
3637
""" Performs a cluster analysis using DBSCAN
3738
38-
:returns [labels,counts,neighbors]: lists of the id of the cluster to
39-
which every atom is belonging to, of the
40-
number of elements in each cluster, and of
41-
the number of neighbors for each atom
42-
according to the specified criterion.
39+
:returns [labels,counts,neighbors,min_samples]: lists of the id of
40+
the cluster to which every atom is belonging to, of the
41+
number of elements in each cluster,and of the number of
42+
neighbors for each atom according to the specified criterion.
43+
The last item, min_samples, is the threshold used in the
44+
clustering algorithm as passed or determined with the 'auto'
45+
option.
4346
4447
Uses a slightly modified version of DBSCAN from sklearn.cluster
4548
that takes periodic boundary conditions into account (through
@@ -71,13 +74,12 @@ def do_cluster_analysis_dbscan(group,
7174

7275
min_samples = determine_samples(threshold_density, cluster_cut,
7376
n_neighbors)
74-
7577
labels = -np.ones(points.shape[0], dtype=np.intp)
7678
counts = np.zeros(points.shape[0], dtype=np.intp)
7779

7880
core_samples = np.asarray(n_neighbors >= min_samples, dtype=np.uint8)
7981
dbscan_inner(core_samples, neighborhoods, labels, counts)
80-
return labels, counts, n_neighbors
82+
return labels, counts, n_neighbors, min_samples
8183

8284

8385
def _():
@@ -94,10 +96,10 @@ def _():
9496
>>> u = mda.Universe(ILBENZENE_GRO)
9597
>>> benzene = u.select_atoms('name C and resname LIG')
9698
>>> u.atoms.positions = u.atoms.pack_into_box()
97-
>>> l,c,n = DBScan(benzene, cluster_cut = 4.5, threshold_density = None)
98-
>>> l1,c1,n1 = DBScan(benzene, cluster_cut = 8.5, threshold_density = 'auto')
99+
>>> l,c,n,t = DBScan(benzene, cluster_cut = 4.5, threshold_density = None)
100+
>>> l1,c1,n1,t1 = DBScan(benzene, cluster_cut = 8.5, threshold_density = 'auto')
99101
>>> td = 0.009
100-
>>> l2,c2,n2 = DBScan(benzene, cluster_cut = 8.5, threshold_density = td)
102+
>>> l2,c2,n2,t2 = DBScan(benzene, cluster_cut = 8.5, threshold_density = td)
101103
>>> print (np.sort(c)[-2:])
102104
[ 12 14904]
103105

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cython>=3.0.8
22
numpy>=2.1.3
3-
scipy>=1.12.0
3+
scipy>=1.15.0
44
gsd>3.0.0
55
setuptools
66
MDAnalysis>=2.8.0

0 commit comments

Comments
 (0)