pysal · ljwolf · Aug 12, 2021 · Aug 12, 2021 · Sep 9, 2021 · ljwolf
diff --git a/esda/crand_perf_nogit.py b/esda/crand_perf_nogit.py
@@ -0,0 +1,113 @@
+"""
+Performance benchmarking for parallel crand
+
+...
+
+python crand_perf.py BRANCH DRAWS PERMUTATIONS CORES
+"""
+
+import os, sys, time, datetime
+import subprocess
+import geopandas, pandas
+import numpy as np
+import crand
+from time import time
+from esda.moran import _moran_local_crand
+from libpysal import examples, weights
+
+# Print versions
+import numba, joblib
+print((
+    f"{datetime.datetime.now()} | "\
+    f"Numba: {numba.__version__} | "\
+    f"Joblib: {joblib.__version__}"
+       ))
+
+# Parse arguments
+BRANCH = sys.argv[1]
+DRAWS = int(sys.argv[2])
+PERMUTATIONS = int(sys.argv[3])
+CORES = int(sys.argv[4])
+if int(CORES) == -1:
+    CORES = os.cpu_count()
+SEED = 12345
+
+# Checkout branch
+#subprocess.run(["git", "checkout", BRANCH])
+#print(f"Branch {BRANCH} loaded")
+
+# Load data
+_ = examples.load_example("NCOVR")
+var = "HR60"
+db = geopandas.read_file(
+        examples.get_path("NAT.shp")
+)
+## Augment size
+db = pandas.concat([db]*10)
+w = weights.Queen.from_dataframe(db)
+w.transform = "R"
+
+z = db[var].values
+z = (z - z.mean()) / z.std()
+
+zl = weights.lag_spatial(w, z)
+observed = (w.n - 1) * z * zl / (z * z).sum()
+
+cardinalities = np.array((w.sparse != 0).sum(1)).flatten()
+
+weights = w.sparse.data
+
+permuted_ids = crand.vec_permutations(
+        cardinalities.max(), w.n, PERMUTATIONS, SEED
+)
+
+scaling = (w.n - 1) / (z * z).sum()
+
+n_jobs = CORES
+
+keep = False
+
+stat_func = _moran_local_crand
+
+# Loop over executions (DRAWS)
+compiler = crand.parallel_crand(
+        z, 
+        observed, 
+        cardinalities,
+        weights,
+        permuted_ids,
+        scaling,
+        n_jobs,
+        keep,
+        stat_func,
+)
+print((
+    f"Benchmarking {PERMUTATIONS} permutations using "\
+    f"{CORES} cores and {DRAWS} reps..."
+))
+ts = []
+for i in range(DRAWS):
+    t0 = time()
+    compiler = crand.parallel_crand(
+            z, 
+            observed, 
+            cardinalities,
+            weights,
+            permuted_ids,
+            scaling,
+            n_jobs,
+            keep,
+            stat_func,
+    )
+    t1 = time()
+    t = t1 - t0
+    ts.append(t)
+    #print(f"\tRep {i+1}: {np.round(t, 4)} seconds")
+ts = np.array(ts)
+print((
+    f"\n{PERMUTATIONS} perms | {CORES} cores | "\
+    f"N: {w.n} | "\
+    f"Mean {np.round(ts.mean(), 4)}s | "\
+    f"Std: {np.round(ts.std(), 4)}s\n"
+))
+
diff --git a/esda/crand_perf_sim.py b/esda/crand_perf_sim.py
@@ -0,0 +1,118 @@
+import os, sys, time, datetime
+import subprocess
+import geopandas, pandas
+import numpy as np
+import crand
+from time import time
+from esda.moran import _moran_local_crand
+from libpysal import examples
+from libpysal import weights as lpw
+
+SEED = 12345
+CPUS = os.cpu_count()
+N_FACTORS = [1, 5, 10]
+PERMUTATIONS = [99, 999, 9999]
+CORES = [1] + list(range(2, CPUS+1, 2))
+
+def run_branch(branch, draws=5, var="HR60"):
+    subprocess.run(["git", "checkout", branch])
+    print(f"Branch {branch} loaded")
+    _ = examples.load_example("NCOVR")
+    db = geopandas.read_file(
+            examples.get_path("NAT.shp")
+    )
+    all_times = []
+    mean_times = []
+    for n_factor in N_FACTORS:
+        db = pandas.concat([db]*n_factor)
+        w = lpw.Queen.from_dataframe(db)
+        w.transform = "R"
+        for perms in PERMUTATIONS:
+            for n_jobs in CORES:
+                # Load data
+                z = db[var].values
+                z = (z - z.mean()) / z.std()
+
+                zl = lpw.lag_spatial(w, z)
+                observed = (w.n - 1) * z * zl / (z * z).sum()
+
+                cardinalities = np.array(
+                        (w.sparse != 0).sum(1)
+                ).flatten()
+
+                weights = w.sparse.data
+
+                permuted_ids = crand.vec_permutations(
+                        cardinalities.max(), w.n, perms, SEED
+                )
+
+                scaling = (w.n - 1) / (z * z).sum()
+
+                keep = False
+
+                stat_func = _moran_local_crand
+                # Compile burn
+                compiler = crand.parallel_crand(
+                        z, 
+                        observed, 
+                        cardinalities,
+                        weights,
+                        permuted_ids,
+                        scaling,
+                        n_jobs,
+                        keep,
+                        stat_func,
+                )
+                ts = []
+                for i in range(draws):
+                    t0 = time()
+                    compiler = crand.parallel_crand(
+                            z, 
+                            observed, 
+                            cardinalities,
+                            weights,
+                            permuted_ids,
+                            scaling,
+                            n_jobs,
+                            keep,
+                            stat_func,
+                    )
+                    t1 = time()
+                    t = t1 - t0
+                    ts.append(t)
+                    all_times.append([n_factor, perms, n_jobs, t])
+                ts = np.array(ts)
+                mean_times.append([n_factor, perms, n_jobs, ts.mean()])
+                print((
+                    f"{perms} perms | {n_jobs} cores | "\
+                    f"N: {w.n} | "\
+                    f"Mean {np.round(ts.mean(), 4)}s | "\
+                    f"Std: {np.round(ts.std(), 4)}s"
+                ))
+    all_times = pandas.DataFrame(
+            all_times, 
+            columns=["n_factor", "perms", "n_jobs", "seconds"]
+    )
+    all_times["branch"] = branch
+    mean_times = pandas.DataFrame(
+            mean_times, 
+            columns=["n_factor", "perms", "n_jobs", "seconds"]
+    )
+    mean_times["branch"] = branch
+    return all_times, mean_times
+
+def sim_over_branches(branches):
+    all_times_bag = []
+    mean_times_bag = []
+    for branch in branches:
+        all_times, mean_times = run_branch(branch)
+        all_times_bag.append(all_times)
+        mean_times_bag.append(mean_times)
+    pandas.concat(all_times_bag).to_csv("all_times.csv", index=False)
+    pandas.concat(mean_times_bag).to_csv("mean_times.csv", index=False)
+    return None
+
+if __name__ == '__main__':
+    branches = ['master', 'crand-innerlimit', 'crand-automemmap']
+    _ = sim_over_branches(branches)
+
diff --git a/esda/functions.py b/esda/functions.py
@@ -0,0 +1,66 @@
+from .moran import (
+    Moran,
+    Moran_Local,
+    Moran_BV,
+    Moran_Local_BV,
+    Moran_Rate,
+    Moran_Local_Rate,
+)
+from .geary import Geary
+from .gamma import Gamma
+from .geary_local import Geary_Local
+from .geary_local_mv import Geary_Local_MV
+from .getisord import G, G_Local
+from .join_counts import Join_Counts
+from .join_counts_local import Join_Counts_Local
+from .join_counts_local_bv import Join_Counts_Local_BV
+from .join_counts_local_mv import Join_Counts_Local_MV
+
+# from .lee import Spatial_Pearson # no solution yet for sklearn style classes
+# from .losh import LOSH
+import inspect
+
+for klass in (
+    Moran,
+    Moran_Local,
+    Moran_BV,
+    Moran_Local_BV,
+    Moran_Rate,
+    Moran_Local_Rate,
+    Geary,
+    Gamma,
+    Geary_Local,
+    Geary_Local_MV,
+    G,
+    G_Local,
+    Join_Counts,
+    Join_Counts_Local,
+    Join_Counts_Local_BV,
+    Join_Counts_Local_MV,
+):
+    assert hasattr(klass, "_statistic"), f"{klass} has no _statistic"
+    assert not callable(klass._statistic), f"{klass}._statistic is callable"
+    klassname = klass.__name__
+    name = klass.__name__.lower()
+    if klassname == "LOSH":
+        defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.pval"
+    elif klassname == "Spatial_Pearson":
+        defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.significance_"
+    else:
+        defn = f"def {name}(*args, **kwargs):\n\tobj = {klassname}(*args, **kwargs)\n\treturn obj._statistic, obj.p_sim"
+    exec(defn)
+    exec(f"{name}.__doc__ = {klassname}.__doc__")
+    init_sig = inspect.signature(klass)
+    globals()[name].__signature__ = init_sig
+    del globals()[klassname]
+
+for klass in (LOSH, Spatial_Pearson):
+    # sklearn style...
+    pass
+
+del klassname
+del klass
+del name
+del init_sig
+del defn
+del inspect
diff --git a/esda/geary_local.py b/esda/geary_local.py
@@ -121,7 +121,7 @@ def fit(self, x):
         n_jobs = self.n_jobs
         seed = self.seed
 
-        self.localG = self._statistic(x, w)
+        self.localG = self._stat_func(x, w)
 
         if permutations:
             self.p_sim, self.rlocalG = _crand_plus(
@@ -150,8 +150,12 @@ def fit(self, x):
 
         return self
 
+    @property
+    def _statistic(self):
+        return self.localG
+
     @staticmethod
-    def _statistic(x, w):
+    def _stat_func(x, w):
         # Caclulate z-scores for x
         zscore_x = (x - np.mean(x)) / np.std(x)
         # Create focal (xi) and neighbor (zi) values