@@ -68,6 +68,11 @@ class Interface(object):
6868 extra_cluster_groups , _extra_cluster_groups = \
6969 _create_property ('extra_cluster_groups' ,
7070 "(ndarray) additional cluster groups" )
71+
72+ extra_cluster_count , _extra_cluster_count = \
73+ _create_property ('extra_cluster_count' ,
74+ "(int) maximum number of extra group clusters (sorted by decreasing size) to exclude." )
75+
7176 radii_dict , _radii_dict = \
7277 _create_property ('radii_dict' , "(dict) custom atomic radii" )
7378
@@ -126,6 +131,7 @@ def label_group(self,
126131 beta = None ,
127132 layer = None ,
128133 cluster = None ,
134+ surface_cluster = None ,
129135 side = None ):
130136 if group is None :
131137 raise RuntimeError (
@@ -146,6 +152,8 @@ def label_group(self,
146152 _group .sides = side
147153 if cluster is not None :
148154 _group .clusters = cluster
155+ if surface_cluster is not None :
156+ _group .surface_clusters = surface_cluster
149157
150158 def _assign_symmetry (self , symmetry ):
151159 if self .analysis_group is None :
@@ -158,72 +166,71 @@ def _assign_symmetry(self, symmetry):
158166 self .symmetry = symmetry
159167
160168 def _generate_surface_clusters (self , group , cut ):
161- # at the moment, selects only the biggest cluster
162- labels , counts , neighs = utilities .do_cluster_analysis_dbscan (
163- group , cut ,
169+ labels , counts , neighs , _ = utilities .do_cluster_analysis_dbscan (
170+ group = group , cluster_cut = cut ,threshold_density = None ,
164171 molecular = False )
165- return group [np .where (labels == np .argmax (counts ))[0 ]]
172+ sortid = np .argsort (counts ,stable = True )[::- 1 ]
173+ sortid = sortid [counts [sortid ]> 0 ] # just the clusters with more than one element
174+ self .surface_clusters = [group [labels == s ] for s in sortid ]
175+ return self .surface_clusters
166176
167177 def _define_cluster_group (self ):
168178 self .universe .atoms .pack_into_box ()
169179 self .cluster_group = self .universe .atoms [:0 ] # empty
170180 if (self .cluster_cut is not None ):
171- cluster_cut = float (self .cluster_cut [0 ])
172181 # we start by adding the atoms in the smaller clusters
173- # of the opposit phase, if extra_cluster_groups are provided
182+ # of the opposite phase, if extra_cluster_groups are provided
183+ self ._min_samples = [None ]
174184 if (self .extra_cluster_groups is not None ):
175- for extra in self .extra_cluster_groups :
176- x_labels , x_counts , _ = utilities .do_cluster_analysis_dbscan (
177- extra , cluster_cut , self .cluster_threshold_density ,
178- self .molecular )
185+ for i ,extra in enumerate (self .extra_cluster_groups ):
186+ if len (self .cluster_cut ) == 1 :
187+ cluster_cut = self .cluster_cut [0 ]
188+ else :
189+ cluster_cut = self .cluster_cut [i + 1 ]
190+ if len (self .cluster_threshold_density ) == 1 :
191+ cluster_threshold_density = self .cluster_threshold_density [0 ]
192+ else :
193+ cluster_threshold_density = self .cluster_threshold_density [i + 1 ]
194+ x_labels , x_counts , _ , min_samples = utilities .do_cluster_analysis_dbscan (
195+ group = extra , cluster_cut = cluster_cut ,
196+ threshold_density = cluster_threshold_density ,
197+ molecular = self .molecular )
179198 x_labels = np .array (x_labels )
180- x_label_max = np .argmax (x_counts )
181- x_ids_other = np .where (x_labels != x_label_max )[0 ]
182-
199+ x_label_selection = np .argsort (x_counts )[:: - 1 ][: self . extra_cluster_count ]
200+ x_ids_other = np .where (~ np . isin ( x_labels , x_label_selection ) )[0 ]
201+ self . _min_samples . append ( float ( min_samples ))
183202 self .cluster_group += extra [x_ids_other ]
184-
203+ self . minority_cluster_group = extra [ x_ids_other ]
185204 # next, we add the atoms belonging to the main phase
186205 self .cluster_group += self .analysis_group
187206
188207 # groups have been checked already in _sanity_checks()
189208 # self.cluster_group at this stage is composed of analysis_group +
190209 # the smaller clusters of the other phase
191- labels , counts , neighbors = utilities .do_cluster_analysis_dbscan (
192- self .cluster_group , cluster_cut ,
193- self .cluster_threshold_density , self .molecular )
210+ labels , counts , neighbors , min_samples = utilities .do_cluster_analysis_dbscan (
211+ self .cluster_group , self . cluster_cut [ 0 ] ,
212+ self .cluster_threshold_density [ 0 ] , self .molecular )
194213 labels = np .array (labels )
195-
214+ self . _min_samples [ 0 ] = float ( min_samples )
196215 # counts is not necessarily ordered by size of cluster.
216+ # we sort it and remember that its index corresponds to the
217+ # label
197218 sorting = np .argsort (counts ,kind = 'stable' )[::- 1 ]
198219 # labels for atoms in each cluster starting from the largest
199- unique_labels = np .sort (np .unique (labels [labels > - 1 ]))
220+ # discarding cases where counts are zero (exhausted the labels)
221+ unique_labels = [int (lab ) for lab in sorting if counts [lab ] > 0 ]
200222 # by default, all elements of the cluster_group are in
201223 # single-molecule/atom clusters. We will update them right after.
202224 self .label_group (self .cluster_group , cluster = - 1 )
203- # we go in reverse order to let smaller labels (bigger clusters)
204- # overwrite larger labels (smaller cluster) when the molecular
225+ # we let bigger clusters overwrite smaller cluster when the molecular
205226 # option is used.
206227 for el in unique_labels [::- 1 ]:
207228 # select a label
208- cond = np . where (labels == el )
229+ cond = (labels == el )
209230 if self .molecular is True :
210231 g_ = self .cluster_group [cond ].residues .atoms
211232 else :
212233 g_ = self .cluster_group [cond ]
213- # probably we need an example here, say:
214- # counts = [ 61, 1230, 34, 0, ... 0 ,0 ]
215- # labels = [ 0, 1, 2, 1, -1 .... -1 ]
216- # we have three clusters, of 61, 1230 and 34 atoms.
217- # There are 61 labels '0'
218- # 1230 labels '1'
219- # 34 labels '2'
220- # the remaining are '-1'
221- #
222- # sorting = [1,0,2,3,....] i.e. the largest element is in
223- # (1230) position 1, the next (61) is in position 0, ...
224- # Say, g_ is now the group with label '1' (the biggest cluster)
225- # Using argwhere(sorting==1) returns exactly 0 -> the right
226- # ordered label for the largest cluster.
227234 self .label_group (g_ , cluster = np .argwhere (sorting == el )[0 , 0 ])
228235 # now that labels are assigned for each of the clusters,
229236 # we can restric the cluster group to the largest cluster.
@@ -253,6 +260,8 @@ def _define_cluster_group(self):
253260 else :
254261 self .cluster_group = self .analysis_group
255262 self .label_group (self .cluster_group , cluster = 0 )
263+ if len (self .cluster_group ) == 0 :
264+ raise ValueError ('Empty cluster group: change your cluster search settings.' )
256265
257266 def is_buried (self , pos ):
258267 """ Checks wether an array of positions are located below
0 commit comments