add benchmarks of Graph using asv (#797)

martinfleis · ljwolf · web-flow · commit fb21438476fe · 2025-09-12T11:56:15.000+02:00
* add benchmarks

* use asv

* properly use asv

* cleanup

* benchmarks/bench_graph.py

---------

Co-authored-by: Levi John Wolf &lt;levi.john.wolf@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -106,3 +106,4 @@ pysal/examples/snow_maps/soho_graph.qpj
 pysal/examples/snow_maps/soho_graph.shp
 pysal/examples/snow_maps/soho_graph.shx
 
+.asv
diff --git a/asv.conf.json b/asv.conf.json
@@ -0,0 +1,210 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "libpysal",
+
+    // The project's homepage
+    "project_url": "http://pysal.org/libpysal",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": ".",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building the project.
+    // See asv.conf.json documentation.
+    // To build the package using pyproject.toml (PEP518), uncomment the following lines
+    "build_command": [
+        "python -m pip install build",
+        "python -m build",
+        "python -mpip wheel -w {build_cache_dir} {build_dir} --no-deps"
+    ],
+    // To build the package using setuptools and a setup.py file, uncomment the following lines
+    // "build_command": [
+    //     "python setup.py build",
+    //     "python -mpip wheel -w {build_cache_dir} {build_dir}"
+    // ],
+
+    // Customizable commands for installing and uninstalling the project.
+    // See asv.conf.json documentation.
+    // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+
+    // List of branches to benchmark. If not provided, defaults to "main"
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv", "mamba" (above 3.8)
+    // or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "http://github.com/pysal/libpysal/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["3.8", "3.12"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    // "conda_channels": ["conda-forge", "defaults"],
+
+    // A conda environment file that is used for environment creation.
+    // "conda_environment_file": "ci/313-latest.yaml",
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    "matrix": {
+        "req": {
+            "numpy": "2.2",
+            "beautifulsoup4": "",
+            "geopandas": "",
+            "jinja2": "",
+            "packaging": "",
+            "pandas": "",
+            "platformdirs": "",
+            "requests": "",
+            "scikit-learn": "",
+            "scipy": "",
+            "shapely": "",
+            "numba": "",
+            "joblib": "",
+            "networkx": "",
+            "pyarrow": "",
+            "sqlalchemy": "",
+            "xarray": "",
+            "zstd": "",
+            "pandana": "",
+            "geodatasets": "",
+            "matplotlib": ""
+        }
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python3.12
+    //     {"python": "3.12", "req": {"numpy": "1.26"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "3.12", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/bench_graph.py b/benchmarks/bench_graph.py
@@ -0,0 +1,113 @@
+import geopandas as gpd
+import numpy as np
+from scipy.sparse import linalg as spla
+from geodatasets import get_path
+
+from libpysal.graph import Graph
+
+
+class TimeSuite:
+    def setup(self, *args, **kwargs):
+        self.gdf = gpd.read_file(get_path("geoda south"))
+        self.gdf_str = self.gdf.set_index(self.gdf.NAME + " " + self.gdf.STATE_NAME)
+        self.gdf_points = self.gdf.set_geometry(self.gdf.representative_point())
+        self.gdf_str_points = self.gdf_str.set_geometry(
+            self.gdf_str.representative_point()
+        )
+
+        self.graphs = {
+            "small_int": Graph.build_knn(self.gdf_points, k=10),
+            "large_int": Graph.build_knn(self.gdf_points, k=500),
+            "small_str": Graph.build_knn(self.gdf_str_points, k=10),
+            "large_str": Graph.build_knn(self.gdf_str_points, k=500),
+        }
+        self.ids = {
+            "int": self.gdf.index.to_series().sample(self.gdf.shape[0] // 5).values,
+            "str": self.gdf_str.index.to_series()
+            .sample(self.gdf_str.shape[0] // 5)
+            .values,
+        }
+        self.sparse_arrays = {
+            k+"_k":v.sparse for k,v in self.graphs.items()
+        }
+        
+        self.sparse_arrays = {
+            "small_int": Graph.build_knn(self.gdf_points, k=10).sparse,
+            "large_int": Graph.build_knn(self.gdf_points, k=500).sparse,
+                'queen' : Graph.build_contiguity(self.gdf).sparse,
+                'rook'  : Graph.build_contiguity(self.gdf).sparse,
+                'delaunay' : Graph.build_triangulation(self.gdf).sparse
+                'gabriel' : Graph.build_triangulation(self.gdf, method='gabriel').sparse
+                'relneigh' : Graph.build_triangulation(self.gdf, method='relative_neighborhoood').sparse
+            }
+        )
+
+    def time_queen(self, idx, strict):
+        Graph.build_contiguity(
+            self.gdf if idx == "int" else self.gdf_str,
+            strict=strict,
+        )
+
+    time_queen.params = (["int", "str"], [True, False])
+    time_queen.param_names = ["index", "strict"]
+
+    def time_knn(self, idx, k):
+        Graph.build_knn(self.gdf_points if idx == "int" else self.gdf_str_points, k=k)
+
+    time_knn.params = (["int", "str"], [10, 500])
+    time_knn.param_names = ["index", "k"]
+
+    def time_kernel(self, idx):
+        Graph.build_kernel(self.gdf_points if idx == "int" else self.gdf_str_points)
+
+    time_kernel.params = ["int", "str"]
+    time_kernel.param_names = ["index"]
+
+    def time_assign_self_weight(self, idx, size):
+        self.graphs[f"{size}_{idx}"].assign_self_weight()
+
+    time_assign_self_weight.params = (["int", "str"], ["small", "large"])
+    time_assign_self_weight.param_names = ["index", "graph_size"]
+
+    def time_sparse(self, idx, size):
+        s = self.graphs[f"{size}_{idx}"].sparse
+
+    time_sparse.params = (["int", "str"], ["small", "large"])
+    time_sparse.param_names = ["index", "graph_size"]
+
+    def time_subgraph(self, idx, size):
+        self.graphs[f"{size}_{idx}"].subgraph(self.ids[idx])
+
+    time_subgraph.params = (["int", "str"], ["small", "large"])
+    time_subgraph.param_names = ["index", "graph_size"]
+
+    def time_inverse(self, graph):
+        s = self.graphs[f"{graph}"].sparse
+        np.linalg.inv(np.eye(s.shape[0]) - .5*s)
+    
+    def time_dense_solve(self, graph):
+        s = self.graphs[f"{graph}"].sparse
+        np.linalg.solve(
+            (np.eye(s.shape[0]) - .5*s).todense(), 
+            np.arange(w.shape[0])
+        )
+
+    def time_sparse_solve(self, graph):
+        s = self.graphs[f"{graph}"].sparse
+        spla.spsolve(
+            sp.eye(s.shape[0]) - .5*s, 
+            np.arange(w.shape[0])
+        )
+
+    def time_dense_slogdet(self, graph):
+        s = self.graphs[f"{graph}"].sparse
+        np.linalg.slogdet(np.eye(s.shape[0]) - .5*s)
+
+    def time_sparse_slogdet(self, graph):
+        s = self.graphs[f"{graph}"].sparse
+        LU = spla.splu(sp.eye(s.shape[0]) - .5*s)
+        np.sum(np.log(np.abs(LU.U.diagonal())))
+
+# class MemSuite:
+#     def mem_list(self):
+#         return [0] * 256
diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py
@@ -1,4 +1,5 @@
 import math
+import os
 from functools import cached_property
 
 import numpy as np
@@ -36,6 +37,9 @@
 from .io._gwt import _read_gwt, _to_gwt
 from .io._parquet import _read_parquet, _to_parquet
 
+if os.environ.get("ASV", "false") == "true":
+    cached_property = property  # remove cache for benchmark purposes  # noqa: F811
+
 ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C")
 
 # listed alphabetically

Original file line number	Diff line number	Diff line change
`@@ -106,3 +106,4 @@ pysal/examples/snow_maps/soho_graph.qpj`
`106`	`106`	`pysal/examples/snow_maps/soho_graph.shp`
`107`	`107`	`pysal/examples/snow_maps/soho_graph.shx`
`108`	`108`
	`109`	`+.asv`