Skip to content

Commit fb21438

Browse files
martinfleisljwolf
andauthored
add benchmarks of Graph using asv (#797)
* add benchmarks * use asv * properly use asv * cleanup * benchmarks/bench_graph.py --------- Co-authored-by: Levi John Wolf <[email protected]>
1 parent 71ace91 commit fb21438

File tree

5 files changed

+328
-0
lines changed

5 files changed

+328
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,4 @@ pysal/examples/snow_maps/soho_graph.qpj
106106
pysal/examples/snow_maps/soho_graph.shp
107107
pysal/examples/snow_maps/soho_graph.shx
108108

109+
.asv

asv.conf.json

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "libpysal",
8+
9+
// The project's homepage
10+
"project_url": "http://pysal.org/libpysal",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": ".",
15+
16+
// The Python project's subdirectory in your repo. If missing or
17+
// the empty string, the project is assumed to be located at the root
18+
// of the repository.
19+
// "repo_subdir": "",
20+
21+
// Customizable commands for building the project.
22+
// See asv.conf.json documentation.
23+
// To build the package using pyproject.toml (PEP518), uncomment the following lines
24+
"build_command": [
25+
"python -m pip install build",
26+
"python -m build",
27+
"python -mpip wheel -w {build_cache_dir} {build_dir} --no-deps"
28+
],
29+
// To build the package using setuptools and a setup.py file, uncomment the following lines
30+
// "build_command": [
31+
// "python setup.py build",
32+
// "python -mpip wheel -w {build_cache_dir} {build_dir}"
33+
// ],
34+
35+
// Customizable commands for installing and uninstalling the project.
36+
// See asv.conf.json documentation.
37+
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
38+
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
39+
40+
// List of branches to benchmark. If not provided, defaults to "main"
41+
// (for git) or "default" (for mercurial).
42+
"branches": ["main"], // for git
43+
// "branches": ["default"], // for mercurial
44+
45+
// The DVCS being used. If not set, it will be automatically
46+
// determined from "repo" by looking at the protocol in the URL
47+
// (if remote), or by looking for special directories, such as
48+
// ".git" (if local).
49+
// "dvcs": "git",
50+
51+
// The tool to use to create environments. May be "conda",
52+
// "virtualenv", "mamba" (above 3.8)
53+
// or other value depending on the plugins in use.
54+
// If missing or the empty string, the tool will be automatically
55+
// determined by looking for tools on the PATH environment
56+
// variable.
57+
"environment_type": "virtualenv",
58+
59+
// timeout in seconds for installing any dependencies in environment
60+
// defaults to 10 min
61+
//"install_timeout": 600,
62+
63+
// the base URL to show a commit for the project.
64+
"show_commit_url": "http://github.com/pysal/libpysal/commit/",
65+
66+
// The Pythons you'd like to test against. If not provided, defaults
67+
// to the current version of Python used to run `asv`.
68+
// "pythons": ["3.8", "3.12"],
69+
70+
// The list of conda channel names to be searched for benchmark
71+
// dependency packages in the specified order
72+
// "conda_channels": ["conda-forge", "defaults"],
73+
74+
// A conda environment file that is used for environment creation.
75+
// "conda_environment_file": "ci/313-latest.yaml",
76+
77+
// The matrix of dependencies to test. Each key of the "req"
78+
// requirements dictionary is the name of a package (in PyPI) and
79+
// the values are version numbers. An empty list or empty string
80+
// indicates to just test against the default (latest)
81+
// version. null indicates that the package is to not be
82+
// installed. If the package to be tested is only available from
83+
// PyPi, and the 'environment_type' is conda, then you can preface
84+
// the package name by 'pip+', and the package will be installed
85+
// via pip (with all the conda available packages installed first,
86+
// followed by the pip installed packages).
87+
//
88+
// The ``@env`` and ``@env_nobuild`` keys contain the matrix of
89+
// environment variables to pass to build and benchmark commands.
90+
// An environment will be created for every combination of the
91+
// cartesian product of the "@env" variables in this matrix.
92+
// Variables in "@env_nobuild" will be passed to every environment
93+
// during the benchmark phase, but will not trigger creation of
94+
// new environments. A value of ``null`` means that the variable
95+
// will not be set for the current combination.
96+
//
97+
"matrix": {
98+
"req": {
99+
"numpy": "2.2",
100+
"beautifulsoup4": "",
101+
"geopandas": "",
102+
"jinja2": "",
103+
"packaging": "",
104+
"pandas": "",
105+
"platformdirs": "",
106+
"requests": "",
107+
"scikit-learn": "",
108+
"scipy": "",
109+
"shapely": "",
110+
"numba": "",
111+
"joblib": "",
112+
"networkx": "",
113+
"pyarrow": "",
114+
"sqlalchemy": "",
115+
"xarray": "",
116+
"zstd": "",
117+
"pandana": "",
118+
"geodatasets": "",
119+
"matplotlib": ""
120+
}
121+
},
122+
123+
// Combinations of libraries/python versions can be excluded/included
124+
// from the set to test. Each entry is a dictionary containing additional
125+
// key-value pairs to include/exclude.
126+
//
127+
// An exclude entry excludes entries where all values match. The
128+
// values are regexps that should match the whole string.
129+
//
130+
// An include entry adds an environment. Only the packages listed
131+
// are installed. The 'python' key is required. The exclude rules
132+
// do not apply to includes.
133+
//
134+
// In addition to package names, the following keys are available:
135+
//
136+
// - python
137+
// Python version, as in the *pythons* variable above.
138+
// - environment_type
139+
// Environment type, as above.
140+
// - sys_platform
141+
// Platform, as in sys.platform. Possible values for the common
142+
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
143+
// - req
144+
// Required packages
145+
// - env
146+
// Environment variables
147+
// - env_nobuild
148+
// Non-build environment variables
149+
//
150+
// "exclude": [
151+
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
152+
// {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
153+
// {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
154+
// ],
155+
//
156+
// "include": [
157+
// // additional env for python3.12
158+
// {"python": "3.12", "req": {"numpy": "1.26"}, "env_nobuild": {"FOO": "123"}},
159+
// // additional env if run on windows+conda
160+
// {"platform": "win32", "environment_type": "conda", "python": "3.12", "req": {"libpython": ""}},
161+
// ],
162+
163+
// The directory (relative to the current directory) that benchmarks are
164+
// stored in. If not provided, defaults to "benchmarks"
165+
// "benchmark_dir": "benchmarks",
166+
167+
// The directory (relative to the current directory) to cache the Python
168+
// environments in. If not provided, defaults to "env"
169+
"env_dir": ".asv/env",
170+
171+
// The directory (relative to the current directory) that raw benchmark
172+
// results are stored in. If not provided, defaults to "results".
173+
"results_dir": ".asv/results",
174+
175+
// The directory (relative to the current directory) that the html tree
176+
// should be written to. If not provided, defaults to "html".
177+
"html_dir": ".asv/html",
178+
179+
// The number of characters to retain in the commit hashes.
180+
// "hash_length": 8,
181+
182+
// `asv` will cache results of the recent builds in each
183+
// environment, making them faster to install next time. This is
184+
// the number of builds to keep, per environment.
185+
// "build_cache_size": 2,
186+
187+
// The commits after which the regression search in `asv publish`
188+
// should start looking for regressions. Dictionary whose keys are
189+
// regexps matching to benchmark names, and values corresponding to
190+
// the commit (exclusive) after which to start looking for
191+
// regressions. The default is to start from the first commit
192+
// with results. If the commit is `null`, regression detection is
193+
// skipped for the matching benchmark.
194+
//
195+
// "regressions_first_commits": {
196+
// "some_benchmark": "352cdf", // Consider regressions only after this commit
197+
// "another_benchmark": null, // Skip regression detection altogether
198+
// },
199+
200+
// The thresholds for relative change in results, after which `asv
201+
// publish` starts reporting regressions. Dictionary of the same
202+
// form as in ``regressions_first_commits``, with values
203+
// indicating the thresholds. If multiple entries match, the
204+
// maximum is taken. If no entry matches, the default is 5%.
205+
//
206+
// "regressions_thresholds": {
207+
// "some_benchmark": 0.01, // Threshold of 1%
208+
// "another_benchmark": 0.5, // Threshold of 50%
209+
// },
210+
}

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/bench_graph.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import geopandas as gpd
2+
import numpy as np
3+
from scipy.sparse import linalg as spla
4+
from geodatasets import get_path
5+
6+
from libpysal.graph import Graph
7+
8+
9+
class TimeSuite:
10+
def setup(self, *args, **kwargs):
11+
self.gdf = gpd.read_file(get_path("geoda south"))
12+
self.gdf_str = self.gdf.set_index(self.gdf.NAME + " " + self.gdf.STATE_NAME)
13+
self.gdf_points = self.gdf.set_geometry(self.gdf.representative_point())
14+
self.gdf_str_points = self.gdf_str.set_geometry(
15+
self.gdf_str.representative_point()
16+
)
17+
18+
self.graphs = {
19+
"small_int": Graph.build_knn(self.gdf_points, k=10),
20+
"large_int": Graph.build_knn(self.gdf_points, k=500),
21+
"small_str": Graph.build_knn(self.gdf_str_points, k=10),
22+
"large_str": Graph.build_knn(self.gdf_str_points, k=500),
23+
}
24+
self.ids = {
25+
"int": self.gdf.index.to_series().sample(self.gdf.shape[0] // 5).values,
26+
"str": self.gdf_str.index.to_series()
27+
.sample(self.gdf_str.shape[0] // 5)
28+
.values,
29+
}
30+
self.sparse_arrays = {
31+
k+"_k":v.sparse for k,v in self.graphs.items()
32+
}
33+
34+
self.sparse_arrays = {
35+
"small_int": Graph.build_knn(self.gdf_points, k=10).sparse,
36+
"large_int": Graph.build_knn(self.gdf_points, k=500).sparse,
37+
'queen' : Graph.build_contiguity(self.gdf).sparse,
38+
'rook' : Graph.build_contiguity(self.gdf).sparse,
39+
'delaunay' : Graph.build_triangulation(self.gdf).sparse
40+
'gabriel' : Graph.build_triangulation(self.gdf, method='gabriel').sparse
41+
'relneigh' : Graph.build_triangulation(self.gdf, method='relative_neighborhoood').sparse
42+
}
43+
)
44+
45+
def time_queen(self, idx, strict):
46+
Graph.build_contiguity(
47+
self.gdf if idx == "int" else self.gdf_str,
48+
strict=strict,
49+
)
50+
51+
time_queen.params = (["int", "str"], [True, False])
52+
time_queen.param_names = ["index", "strict"]
53+
54+
def time_knn(self, idx, k):
55+
Graph.build_knn(self.gdf_points if idx == "int" else self.gdf_str_points, k=k)
56+
57+
time_knn.params = (["int", "str"], [10, 500])
58+
time_knn.param_names = ["index", "k"]
59+
60+
def time_kernel(self, idx):
61+
Graph.build_kernel(self.gdf_points if idx == "int" else self.gdf_str_points)
62+
63+
time_kernel.params = ["int", "str"]
64+
time_kernel.param_names = ["index"]
65+
66+
def time_assign_self_weight(self, idx, size):
67+
self.graphs[f"{size}_{idx}"].assign_self_weight()
68+
69+
time_assign_self_weight.params = (["int", "str"], ["small", "large"])
70+
time_assign_self_weight.param_names = ["index", "graph_size"]
71+
72+
def time_sparse(self, idx, size):
73+
s = self.graphs[f"{size}_{idx}"].sparse
74+
75+
time_sparse.params = (["int", "str"], ["small", "large"])
76+
time_sparse.param_names = ["index", "graph_size"]
77+
78+
def time_subgraph(self, idx, size):
79+
self.graphs[f"{size}_{idx}"].subgraph(self.ids[idx])
80+
81+
time_subgraph.params = (["int", "str"], ["small", "large"])
82+
time_subgraph.param_names = ["index", "graph_size"]
83+
84+
def time_inverse(self, graph):
85+
s = self.graphs[f"{graph}"].sparse
86+
np.linalg.inv(np.eye(s.shape[0]) - .5*s)
87+
88+
def time_dense_solve(self, graph):
89+
s = self.graphs[f"{graph}"].sparse
90+
np.linalg.solve(
91+
(np.eye(s.shape[0]) - .5*s).todense(),
92+
np.arange(w.shape[0])
93+
)
94+
95+
def time_sparse_solve(self, graph):
96+
s = self.graphs[f"{graph}"].sparse
97+
spla.spsolve(
98+
sp.eye(s.shape[0]) - .5*s,
99+
np.arange(w.shape[0])
100+
)
101+
102+
def time_dense_slogdet(self, graph):
103+
s = self.graphs[f"{graph}"].sparse
104+
np.linalg.slogdet(np.eye(s.shape[0]) - .5*s)
105+
106+
def time_sparse_slogdet(self, graph):
107+
s = self.graphs[f"{graph}"].sparse
108+
LU = spla.splu(sp.eye(s.shape[0]) - .5*s)
109+
np.sum(np.log(np.abs(LU.U.diagonal())))
110+
111+
# class MemSuite:
112+
# def mem_list(self):
113+
# return [0] * 256

libpysal/graph/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import math
2+
import os
23
from functools import cached_property
34

45
import numpy as np
@@ -36,6 +37,9 @@
3637
from .io._gwt import _read_gwt, _to_gwt
3738
from .io._parquet import _read_parquet, _to_parquet
3839

40+
if os.environ.get("ASV", "false") == "true":
41+
cached_property = property # remove cache for benchmark purposes # noqa: F811
42+
3943
ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C")
4044

4145
# listed alphabetically

0 commit comments

Comments
 (0)