Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions esda/crand_perf_nogit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
Performance benchmarking for parallel crand

...

python crand_perf.py BRANCH DRAWS PERMUTATIONS CORES
"""

import os, sys, time, datetime
import subprocess
import geopandas, pandas
import numpy as np
import crand
from time import time
from esda.moran import _moran_local_crand
from libpysal import examples, weights

# Print versions
import numba, joblib
print((
f"{datetime.datetime.now()} | "\
f"Numba: {numba.__version__} | "\
f"Joblib: {joblib.__version__}"
))

# Parse arguments
BRANCH = sys.argv[1]
DRAWS = int(sys.argv[2])
PERMUTATIONS = int(sys.argv[3])
CORES = int(sys.argv[4])
if int(CORES) == -1:
CORES = os.cpu_count()
SEED = 12345

# Checkout branch
#subprocess.run(["git", "checkout", BRANCH])
#print(f"Branch {BRANCH} loaded")

# Load data
_ = examples.load_example("NCOVR")
var = "HR60"
db = geopandas.read_file(
examples.get_path("NAT.shp")
)
## Augment size
db = pandas.concat([db]*10)
w = weights.Queen.from_dataframe(db)
w.transform = "R"

z = db[var].values
z = (z - z.mean()) / z.std()

zl = weights.lag_spatial(w, z)
observed = (w.n - 1) * z * zl / (z * z).sum()

cardinalities = np.array((w.sparse != 0).sum(1)).flatten()

weights = w.sparse.data

permuted_ids = crand.vec_permutations(
cardinalities.max(), w.n, PERMUTATIONS, SEED
)

scaling = (w.n - 1) / (z * z).sum()

n_jobs = CORES

keep = False

stat_func = _moran_local_crand

# Loop over executions (DRAWS)
compiler = crand.parallel_crand(
z,
observed,
cardinalities,
weights,
permuted_ids,
scaling,
n_jobs,
keep,
stat_func,
)
print((
f"Benchmarking {PERMUTATIONS} permutations using "\
f"{CORES} cores and {DRAWS} reps..."
))
ts = []
for i in range(DRAWS):
t0 = time()
compiler = crand.parallel_crand(
z,
observed,
cardinalities,
weights,
permuted_ids,
scaling,
n_jobs,
keep,
stat_func,
)
t1 = time()
t = t1 - t0
ts.append(t)
#print(f"\tRep {i+1}: {np.round(t, 4)} seconds")
ts = np.array(ts)
print((
f"\n{PERMUTATIONS} perms | {CORES} cores | "\
f"N: {w.n} | "\
f"Mean {np.round(ts.mean(), 4)}s | "\
f"Std: {np.round(ts.std(), 4)}s\n"
))

118 changes: 118 additions & 0 deletions esda/crand_perf_sim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import os, sys, time, datetime
import subprocess
import geopandas, pandas
import numpy as np
import crand
from time import time
from esda.moran import _moran_local_crand
from libpysal import examples
from libpysal import weights as lpw

SEED = 12345
CPUS = os.cpu_count()
N_FACTORS = [1, 5, 10]
PERMUTATIONS = [99, 999, 9999]
CORES = [1] + list(range(2, CPUS+1, 2))

def run_branch(branch, draws=5, var="HR60"):
subprocess.run(["git", "checkout", branch])
print(f"Branch {branch} loaded")
_ = examples.load_example("NCOVR")
db = geopandas.read_file(
examples.get_path("NAT.shp")
)
all_times = []
mean_times = []
for n_factor in N_FACTORS:
db = pandas.concat([db]*n_factor)
w = lpw.Queen.from_dataframe(db)
w.transform = "R"
for perms in PERMUTATIONS:
for n_jobs in CORES:
# Load data
z = db[var].values
z = (z - z.mean()) / z.std()

zl = lpw.lag_spatial(w, z)
observed = (w.n - 1) * z * zl / (z * z).sum()

cardinalities = np.array(
(w.sparse != 0).sum(1)
).flatten()

weights = w.sparse.data

permuted_ids = crand.vec_permutations(
cardinalities.max(), w.n, perms, SEED
)

scaling = (w.n - 1) / (z * z).sum()

keep = False

stat_func = _moran_local_crand
# Compile burn
compiler = crand.parallel_crand(
z,
observed,
cardinalities,
weights,
permuted_ids,
scaling,
n_jobs,
keep,
stat_func,
)
ts = []
for i in range(draws):
t0 = time()
compiler = crand.parallel_crand(
z,
observed,
cardinalities,
weights,
permuted_ids,
scaling,
n_jobs,
keep,
stat_func,
)
t1 = time()
t = t1 - t0
ts.append(t)
all_times.append([n_factor, perms, n_jobs, t])
ts = np.array(ts)
mean_times.append([n_factor, perms, n_jobs, ts.mean()])
print((
f"{perms} perms | {n_jobs} cores | "\
f"N: {w.n} | "\
f"Mean {np.round(ts.mean(), 4)}s | "\
f"Std: {np.round(ts.std(), 4)}s"
))
all_times = pandas.DataFrame(
all_times,
columns=["n_factor", "perms", "n_jobs", "seconds"]
)
all_times["branch"] = branch
mean_times = pandas.DataFrame(
mean_times,
columns=["n_factor", "perms", "n_jobs", "seconds"]
)
mean_times["branch"] = branch
return all_times, mean_times

def sim_over_branches(branches):
all_times_bag = []
mean_times_bag = []
for branch in branches:
all_times, mean_times = run_branch(branch)
all_times_bag.append(all_times)
mean_times_bag.append(mean_times)
pandas.concat(all_times_bag).to_csv("all_times.csv", index=False)
pandas.concat(mean_times_bag).to_csv("mean_times.csv", index=False)
return None

if __name__ == '__main__':
branches = ['master', 'crand-innerlimit', 'crand-automemmap']
_ = sim_over_branches(branches)

66 changes: 66 additions & 0 deletions esda/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from .moran import (
Moran,
Moran_Local,
Moran_BV,
Moran_Local_BV,
Moran_Rate,
Moran_Local_Rate,
)
from .geary import Geary
from .gamma import Gamma
from .geary_local import Geary_Local
from .geary_local_mv import Geary_Local_MV
from .getisord import G, G_Local
from .join_counts import Join_Counts
from .join_counts_local import Join_Counts_Local
from .join_counts_local_bv import Join_Counts_Local_BV
from .join_counts_local_mv import Join_Counts_Local_MV

# from .lee import Spatial_Pearson # no solution yet for sklearn style classes
# from .losh import LOSH
import inspect

for klass in (
Moran,
Moran_Local,
Moran_BV,
Moran_Local_BV,
Moran_Rate,
Moran_Local_Rate,
Geary,
Gamma,
Geary_Local,
Geary_Local_MV,
G,
G_Local,
Join_Counts,
Join_Counts_Local,
Join_Counts_Local_BV,
Join_Counts_Local_MV,
):
assert hasattr(klass, "_statistic"), f"{klass} has no _statistic"
assert not callable(klass._statistic), f"{klass}._statistic is callable"
klassname = klass.__name__
name = klass.__name__.lower()
if klassname == "LOSH":
defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.pval"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These have to separate the init and fit phases, so I need to figure out how to re-write the signature so that the options all go through.

elif klassname == "Spatial_Pearson":
defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.significance_"
else:
defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.p_sim"
exec(defn)
exec(f"{name}.__doc__ = {klassname}.__doc__")
init_sig = inspect.signature(klass)
globals()[name].__signature__ = init_sig
del globals()[klassname]

for klass in (LOSH, Spatial_Pearson):
# sklearn style...
pass

del klassname
del klass
del name
del init_sig
del defn
del inspect
8 changes: 6 additions & 2 deletions esda/geary_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def fit(self, x):
n_jobs = self.n_jobs
seed = self.seed

self.localG = self._statistic(x, w)
self.localG = self._stat_func(x, w)

if permutations:
self.p_sim, self.rlocalG = _crand_plus(
Expand Down Expand Up @@ -150,8 +150,12 @@ def fit(self, x):

return self

@property
def _statistic(self):
return self.localG

@staticmethod
def _statistic(x, w):
def _stat_func(x, w):
# Caclulate z-scores for x
zscore_x = (x - np.mean(x)) / np.std(x)
# Create focal (xi) and neighbor (zi) values
Expand Down
Loading