Marcello-Sega · Marcello-Sega · Jun 4, 2025 · Feb 6, 2024 · Mar 19, 2025 · Mar 19, 2025
diff --git a/pytim/datafiles/__init__.py b/pytim/datafiles/__init__.py
@@ -92,8 +92,12 @@
     "_TEST_ORIENTATION_GRO",  # test file
     "_TEST_PROFILE_GRO",  # test file
 ]
+try:
+    from importlib.resources import files
+    def resource(path,fname): return str(files(path)/fname)
+except:
+    from pkg_resources import resource_filename as resource
 
-from pkg_resources import resource_filename
 import tempfile
 import re as re
 import urllib
@@ -244,101 +248,101 @@ def _vdwradii_gmx(self, filename):
 
 # NOTE: to add a new datafile, make sure it is listed in setup.py (in the root directory)
 # in the package_data option (a glob like 'data/*' is usually enough)
-CCL4_WATER_GRO = resource_filename('pytim', 'data/CCL4.H2O.GRO')
+CCL4_WATER_GRO = resource('pytim', 'data/CCL4.H2O.GRO')
 pytim_data.add('CCL4_WATER_GRO', 'config', 'GRO',
                'Carbon tetrachloride/TIP4p water interface')
 
-WATER_GRO = resource_filename('pytim', 'data/water.gro')
+WATER_GRO = resource('pytim', 'data/water.gro')
 pytim_data.add('WATER_GRO', 'config', 'GRO', 'SPC water/vapour interface')
 
-WATER_LMP_XTC = resource_filename('pytim', 'data/water_lmp.xtc')
+WATER_LMP_XTC = resource('pytim', 'data/water_lmp.xtc')
 pytim_data.add('WATER_LMP_XTC', 'traj', 'LAMMPS', 'SPC water/vapour interface')
 
-WATER_PDB = resource_filename('pytim', 'data/water.pdb')
+WATER_PDB = resource('pytim', 'data/water.pdb')
 pytim_data.add('WATER_PDB', 'config', 'PDB', 'SPC water/vapour interface')
 
-WATER_XYZ = resource_filename('pytim', 'data/water.xyz')
+WATER_XYZ = resource('pytim', 'data/water.xyz')
 pytim_data.add('WATER_XYZ', 'config', 'XYZ', 'SPC water/vapour interface')
 
-MICELLE_PDB = resource_filename('pytim', 'data/micelle.pdb')
+MICELLE_PDB = resource('pytim', 'data/micelle.pdb')
 pytim_data.add('MICELLE_PDB', 'config', 'GRO', 'DPC micelle')
 
-FULLERENE_PDB = resource_filename('pytim', 'data/fullerene.pdb')
+FULLERENE_PDB = resource('pytim', 'data/fullerene.pdb')
 pytim_data.add('FULLERENE_PDB', 'config', 'PDB', 'fullerene')
 
-DPPC_GRO = resource_filename('pytim', 'data/dppc.gro')
+DPPC_GRO = resource('pytim', 'data/dppc.gro')
 pytim_data.add('DPPC_GRO', 'config', 'GRO', 'DPPC bilayer')
 
-GLUCOSE_PDB = resource_filename('pytim', 'data/glucose.pdb')
+GLUCOSE_PDB = resource('pytim', 'data/glucose.pdb')
 pytim_data.add('GLUCOSE_PDB', 'config', 'PDB', 'solvated beta-d-glucose')
 
-LJ_GRO = resource_filename('pytim', 'data/LJ.gro')
+LJ_GRO = resource('pytim', 'data/LJ.gro')
 pytim_data.add('LJ_GRO', 'config', 'GRO',
                'Lennard-Jones liquid/vapour interface')
 
-LJ_SHORT_XTC = resource_filename('pytim', 'data/LJ.short.xtc')
+LJ_SHORT_XTC = resource('pytim', 'data/LJ.short.xtc')
 pytim_data.add('LJ_SHORT_XTC', 'traj', 'XTC', 'LJ liquid/vapour interface')
 
-WATERSMALL_GRO = resource_filename('pytim', 'data/water-small.gro')
+WATERSMALL_GRO = resource('pytim', 'data/water-small.gro')
 pytim_data.add('WATERSMALL_GRO', 'config', 'GRO',
                'small SPC water/vapour interface')
 
-WATER_TWO_INTERFACES = resource_filename('pytim', 'data/water-2-interfaces.gro')
+WATER_TWO_INTERFACES = resource('pytim', 'data/water-2-interfaces.gro')
 pytim_data.add('WATER_TWO_INTERFACES', 'config', 'GRO',
                'two SPC water/vapour interfaces')
 
-WATER_520K_GRO = resource_filename('pytim', 'data/water_520K.gro')
+WATER_520K_GRO = resource('pytim', 'data/water_520K.gro')
 pytim_data.add('WATER_520K_GRO', 'config', 'GRO',
                'SPC/E water/vapour interface, 520K')
 
-WATER_550K_GRO = resource_filename('pytim', 'data/water_550K.gro')
+WATER_550K_GRO = resource('pytim', 'data/water_550K.gro')
 pytim_data.add('WATER_550K_GRO', 'config', 'GRO',
                'SPC/E water/vapour interface, 550K')
 
-WATER_DROPLET_CYLINDRICAL_GRO = resource_filename('pytim', 'data/water_droplet_cylindrical.gro')
+WATER_DROPLET_CYLINDRICAL_GRO = resource('pytim', 'data/water_droplet_cylindrical.gro')
 pytim_data.add('WATER_DROPLET_CYLINDRICAL_GRO', 'config', 'GRO',
                'cylindrical water droplet on graphite')
 
-WATER_DROPLET_CYLINDRICAL_XTC = resource_filename('pytim', 'data/water_droplet_cylindrical.xtc')
+WATER_DROPLET_CYLINDRICAL_XTC = resource('pytim', 'data/water_droplet_cylindrical.xtc')
 pytim_data.add('WATER_DROPLET_CYLINDRICAL_XTC', 'traj', 'XTC',
                'cylindrical water droplet on graphite trajectory')
 
-METHANOL_GRO = resource_filename('pytim', 'data/methanol.gro')
+METHANOL_GRO = resource('pytim', 'data/methanol.gro')
 pytim_data.add('METHANOL_GRO', 'conf', 'GRO', 'methanol/vapour interface')
 
-ILBENZENE_GRO = resource_filename('pytim', 'data/ilbenzene.gro')
+ILBENZENE_GRO = resource('pytim', 'data/ilbenzene.gro')
 pytim_data.add('ILBENZENE_GRO', 'conf', 'GRO', 'BMIM PF4 / benzene interface')
 
-ANTAGONISTIC_GRO = resource_filename('pytim', 'data/antagonistic.gro')
+ANTAGONISTIC_GRO = resource('pytim', 'data/antagonistic.gro')
 pytim_data.add('ANTAGONISTIC_GRO', 'conf', 'GRO', '3-Methylpyridine, Sodium Tetraphenylborate and water')
 
-WATER_XTC = resource_filename('pytim', 'data/water.xtc')
+WATER_XTC = resource('pytim', 'data/water.xtc')
 pytim_data.add('WATER_XTC', 'traj', 'XTC',
                'SPC water/vapour interface trajectory')
 
-_TEST_BCC_GRO = resource_filename('pytim', 'data/_test_bcc.gro')
+_TEST_BCC_GRO = resource('pytim', 'data/_test_bcc.gro')
 pytim_data.add('_TEST_BCC_GRO', 'config', 'GRO', 'test file')
 
-_TEST_ORIENTATION_GRO = resource_filename('pytim',
+_TEST_ORIENTATION_GRO = resource('pytim',
                                           'data/_test_orientation.gro')
 pytim_data.add('_TEST_ORIENTATION_GRO', 'config', 'GRO', 'test file')
 
-_TEST_PROFILE_GRO = resource_filename('pytim', 'data/_test_profile.gro')
+_TEST_PROFILE_GRO = resource('pytim', 'data/_test_profile.gro')
 pytim_data.add('_TEST_PROFILE_GRO', 'config', 'GRO', 'test file')
 
-WATER_LMP_DATA = resource_filename('pytim', 'data/water_lmp.data')
+WATER_LMP_DATA = resource('pytim', 'data/water_lmp.data')
 pytim_data.add('WATER_LMP_DATA', 'topol', 'DATA',
                'LAMMPS topology for WATER_LAMMPS')
 
-G43A1_TOP = resource_filename('pytim', 'data/ffg43a1.nonbonded.itp')
+G43A1_TOP = resource('pytim', 'data/ffg43a1.nonbonded.itp')
 pytim_data.add('G43A1_TOP', 'topol', 'GMX', 'GROMOS 43A1 topology for GROMACS')
 
-AMBER03_TOP = resource_filename('pytim', 'data/ffamber03.nonbonded.itp')
+AMBER03_TOP = resource('pytim', 'data/ffamber03.nonbonded.itp')
 pytim_data.add('AMBER03_TOP', 'topol', 'GMX', 'AMBER 03 topology for GROMACS')
 
-CHARMM27_TOP = resource_filename('pytim', 'data/ffcharmm27.nonbonded.itp')
+CHARMM27_TOP = resource('pytim', 'data/ffcharmm27.nonbonded.itp')
 pytim_data.add('CHARMM27_TOP', 'topol', 'GMX',
                'CHARMM 27 topology for GROMACS')
 
 # This should be the last line: clean up namespace
-del resource_filename
+del resource
diff --git a/pytim/observables/__init__.py b/pytim/observables/__init__.py
@@ -11,6 +11,8 @@
 from .basic_observables import Position, RelativePosition, Velocity, Force
 from .basic_observables import Number, Mass, Charge, NumberOfResidues
 from .basic_observables import Distance
+from .topological_order_parameter import TopologicalOrderParameter
+from .adjacency_matrix import DistanceBasedAdjacencyMatrix, WaterHydrogenBondingAdjacencyMatrix
 from .local_frame import LocalReferenceFrame, Curvature
 
 from .intrinsic_distance import IntrinsicDistance

diff --git a/pytim/observables/adjacency_matrix.py b/pytim/observables/adjacency_matrix.py
@@ -0,0 +1,142 @@
+from __future__ import print_function
+from abc import ABCMeta, abstractmethod
+import numpy as np
+import warnings
+from scipy import stats, sparse
+from scipy.sparse.csgraph import dijkstra
+from scipy.spatial import KDTree
+from MDAnalysis.core.groups import Atom, AtomGroup, Residue, ResidueGroup
+from MDAnalysis.lib import distances
+from ..utilities_geometry import minimum_image
+from .observable import Observable
+
+
+__metaclass__ = ABCMeta
+
+class AdjacencyMatrix(Observable):
+    """
+    Base class for defining different bonding criteria.
+    """
+    @property
+    @abstractmethod
+    def base_group(self):
+        # TODO: comment
+        pass
+
+class DistanceBasedAdjacencyMatrix(AdjacencyMatrix):
+    """
+    Simple distance-based bonding criterion working on a per-atom basis.
+    """
+    def __init__(self, cutoff_distance=3.5, pbc=True):
+        self.cutoff_distance = cutoff_distance
+        self.pbc = pbc    
+        self.group = None
+
+    def compute(self, group):
+        """
+        Return an adjacency matrix 
+
+        :param group: AtomGroup to compute the adjacency matrix of.
+
+        :return: a (len(group),len(group)) ndarray
+        """
+        if self.pbc: box = group.dimensions[:3]
+        else: box = None 
+        self.group = group
+        self.size = (len(group),len(group))
+        M = sparse.lil_matrix(self.size)*0
+        pos = group.pack_into_box(inplace=True)
+        kdtree = KDTree(pos,boxsize=box)
+        neighs_list = kdtree.query_ball_tree(kdtree, self.cutoff_distance)
+        for i,neighs in enumerate(neighs_list): M[i,neighs] = 1
+        self.M = M
+        return M
+
+    @property
+    def base_group(self): return self.group
+
+
+class WaterHydrogenBondingAdjacencyMatrix(AdjacencyMatrix):
+    """
+    Hydrogen bond criterion for water. 
+    :param str method: one of
+        - 'OO'     : Based on O...O distance.
+        - 'OO_OH'  : Based on O...O and O...H distance.
+        - 'OHO'    : Based on O...H...O angle.
+
+    :param float OO: the donor-acceptor cutoff distance.
+        Only donor-acceptor pairs within this cutoff are considered
+        hydrogen-bonded. Default: 3.5 Angstrom. See also the other
+        requirement, cut_hydrogen_acceptor. Note that requiring two distance
+        cutoffs is practically equivalent to requiring a distance and an angle
+        cutoff, if chosen appropriately.
+
+    :param float OH: the maximum distance between a hydrogen atom and its 
+        acceptor to be considered hydrogen bonded. Only used when the chosen
+        method is 'OO_OH'. Default: 2.5 Angstrom
+
+
+    :param float OHO: the maximum angle for two molecules to be considered
+        h-bonded. Only used when the chosen method is 'OHO'. Default: 30 deg.
+    """
+
+    def __init__(self, method="OO_OH", OO=3.5, OH=2.5, OHO=30.0):
+        self.method = method
+        self.OO = OO  # O...O distance cutoff in Å
+        self.OH = OH  # O...H distance cutoff in Å
+        self.OHO = OHO  # O...H...O angle cutoff in degrees
+        self.oxygens = None
+
+    def compute(self, group):
+        """
+        Return an adjacency matrix.
+
+        :param group: AtomGroup to compute the adjacency matrix of.
+
+        :return: a (nmol, nmol) ndarray with nmol the number of water
+            molecules in group.
+        """
+        # we do a soft check using the residue name
+        assert np.all(group.residues.resnames==group.residues.resnames[0]), 'the group should be homogeneous'
+        _ = group.pack_into_box(inplace=True)
+        # number of hydrogens in each donor molecule
+        self.box = group.dimensions[:3]
+        self.size = (len(group.residues),len(group.residues))
+        self.M = sparse.lil_matrix(self.size)*0
+        self.group = group
+        self.oxygens = group.atoms[group.atoms.types == 'O']
+        self.hydrogens = group.atoms[group.atoms.types == 'H']
+        if self.method == 'OO_OH': return self._adjacency_matrix_OO_OH()
+        else: raise NotImplemented("methods OO and OHO not implemented")
+
+    def _adjacency_matrix_OO_OH(self):
+        tree = KDTree(self.oxygens.positions,boxsize=self.box)
+        OOlist = tree.query_ball_tree(tree, self.OO)
+        # the observable is based on residues, so the max size of the matrix will never be larger than
+        # the number of residues. We build a lookup table to simplify handling them: to obtain the sequential
+        # index within the oxygen group, pass its resindex to the table.
+        # resindices is zero-based, so let's add one. Any residue not in the lookuptable will return -1
+        lookup = -1 + np.zeros(1+np.max(self.oxygens.resindices),dtype=int)
+        lookup[self.oxygens.resindices] = np.arange(len(self.oxygens))
+        for i,neighs in enumerate(OOlist): # for each of the acceptors, the list of neighboring donors
+            neigh_As = self.oxygens[[a for a in neighs if a != i ]] # group of donors neighbor to the i-th acceptor, exclude self.
+            sel = neigh_As.residues.atoms # all the atoms in the molecules of the  neighboring donors of the i-th acceptor
+            # displacement between i-th acceptor and all hydrogens in the neighboring donor molecules
+            disp = minimum_image(sel[sel.types=='H'].positions - self.oxygens[i].position,self.box)
+            #correpsonding Acceptor-Hydrogen (AH) distances
+            AH_d = np.linalg.norm(disp,axis=1)
+            # AH_d: we reshape to extract the closest of the 2 Hs to the acceptor
+            AH_d = np.min(AH_d.reshape((AH_d.shape[0]//2,2)),axis=1)
+            # HB criterion on AH distance
+            cond = np.argwhere(AH_d<self.OH).flatten().tolist()
+            # Update the list of HB pairs: we use the lookup table
+            b = lookup[self.oxygens[i].resindex]
+            assert b==i, "b is not i"+str(i)+' '+str(self.oxygens[i].resindex)+ " b="+str(b)
+            # remember, neigh_As is the list of donors in the neighborhood of the i-th acceptor
+            for a in np.unique(lookup[neigh_As[cond].resindices]).tolist():  self.M[b,a], self.M[a,b] = 1,1
+        return self.M
+
+    @property
+    def base_group(self): return self.oxygens
+
+
diff --git a/pytim/observables/profile.py b/pytim/observables/profile.py
@@ -22,12 +22,12 @@ class Profile(object):
                                     None is supplied, it defaults to the number
                                     density. The number density is always
                                     calculated on a per atom basis.
-    :param ITIM       interface:    if provided, calculate the intrinsic
-                                    profile with respect to the first layers
     :param str        direction:    'x','y', or 'z' : calculate the profile
                                     along this direction. (default: 'z' or
                                     the normal direction of the interface,
                                     if provided.
+    :param ITIM       interface:    if provided, calculate the intrinsic
+                                    profile with respect to the first layers
     :param bool       MCnorm:       if True (default) use a simple Monte Carlo
                                     estimate the effective volumes of the bins.
 
@@ -113,8 +113,8 @@ class Profile(object):
     """
 
     def __init__(self,
-                 direction=None,
                  observable=None,
+                 direction=None,
                  interface=None,
                  symmetry='default',
                  mode='default',
@@ -246,7 +246,7 @@ def sample(self, group, **kargs):
                 self.sampled_rnd_values += rnd_accum
         self._counts += 1
 
-    def get_values(self, binwidth=None, nbins=None):
+    def get_values(self, binwidth=None, nbins=None, density=True):
         if self.sampled_values is None:
             print("Warning no profile sampled so far")
         # we use the largest box (largest number of bins) as reference.
@@ -264,7 +264,7 @@ def get_values(self, binwidth=None, nbins=None):
             nbins += 1
 
         vals = self.sampled_values.copy()
-        vals /= (np.average(self._totvol) / self._nbins)
+        if density: vals /= (np.average(self._totvol) / self._nbins)
         vals /= self._counts
         if self.interface is not None:
             # new versions of scipy.binned_statistic don't like inf