Skip to content

Commit b444600

Browse files
committed
Update pre-commit and fix linter issues
1 parent 6fe4af7 commit b444600

20 files changed

+419
-295
lines changed

.pre-commit-config.yaml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v4.0.1
3+
rev: v6.0.0
44
hooks:
55
- id: end-of-file-fixer
66
- id: trailing-whitespace
77
- id: check-toml
8-
- repo: https://github.com/psf/black
9-
rev: 22.3.0
10-
hooks:
11-
- id: black
12-
- repo: https://github.com/pycqa/isort
13-
rev: 5.13.2
14-
hooks:
15-
- id: isort
8+
- id: check-yaml
9+
args: [--unsafe]
10+
- id: debug-statements
11+
- id: destroyed-symlinks
12+
- id: detect-private-key
1613
- repo: https://github.com/astral-sh/ruff-pre-commit
1714
# Ruff version.
18-
rev: v0.3.7
15+
rev: v0.13.0
1916
hooks:
2017
# Run the linter.
21-
- id: ruff
18+
- id: ruff-check
19+
# Run the formatter.
20+
- id: ruff-format
21+
- repo: https://github.com/pycqa/isort
22+
rev: 6.0.1
23+
hooks:
24+
- id: isort

detclim/bootstrap_tests.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,13 @@ def mannwhitney(data_1, data_2):
101101
def wilcoxon(data_1, data_2):
102102
"""Perform a Wiloxon Signed Rank Test, return P-value."""
103103
with np.errstate(divide="ignore", invalid="ignore"):
104-
return sts.wilcoxon(
105-
data_1 - data_2, axis=1
106-
).pvalue # pyright: ignore[reportAttributeAccessIssue]
104+
return sts.wilcoxon(data_1 - data_2, axis=1).pvalue # pyright: ignore[reportAttributeAccessIssue]
107105

108106

109107
def epps_singleton(data_1, data_2):
110108
"""Perform a 2 sample Epps Singleton test, return P-value."""
111109
try:
112-
_out = sts.epps_singleton_2samp(
113-
data_1, data_2, axis=1
114-
).pvalue # pyright: ignore[reportCallIssue]
110+
_out = sts.epps_singleton_2samp(data_1, data_2, axis=1).pvalue # pyright: ignore[reportCallIssue]
115111
except np.linalg.LinAlgError:
116112
_out = np.ones(data_1.shape[0])
117113
return _out

detclim/notebooks/BootstrapTest.ipynb

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
"from functools import partial\n",
1919
"from statsmodels.stats import multitest as smm\n",
2020
"import multiprocessing as mp\n",
21-
"import seaborn as sns\n",
22-
"from dask.distributed import Client"
21+
"import seaborn as sns"
2322
]
2423
},
2524
{
@@ -39,13 +38,12 @@
3938
" ens_idx = sorted(range(ens_min, ens_max + 1))\n",
4039
" assert len(ens_idx) > ens_size, \"ENSEMBLE SIZE MUST BE SMALLER THAN ENSEMBLE RANGE\"\n",
4140
" if not with_repl and not uniq:\n",
42-
" selected = [\n",
43-
" random.sample(ens_idx, ens_size)\n",
44-
" for _ in range(ncases)\n",
45-
" ]\n",
41+
" selected = [random.sample(ens_idx, ens_size) for _ in range(ncases)]\n",
4642
" elif not with_repl:\n",
4743
" _sel = random.sample(ens_idx, ens_size * ncases)\n",
48-
" selected = [_sel[idx * ens_size : (idx + 1) * ens_size] for idx in range(ncases)]\n",
44+
" selected = [\n",
45+
" _sel[idx * ens_size : (idx + 1) * ens_size] for idx in range(ncases)\n",
46+
" ]\n",
4947
" else:\n",
5048
" selected = [\n",
5149
" [random.randint(ens_min, ens_max) for _ in range(ens_size)]\n",
@@ -63,6 +61,7 @@
6361
" _res = sts.mstats.ks_2samp(data_x, data_y)\n",
6462
" return _res[1]\n",
6563
"\n",
64+
"\n",
6665
"def cvm_2samp(data_x, data_y):\n",
6766
" \"\"\"Perform a 2 sample Cramer von Mises test, map output to a tuple.\"\"\"\n",
6867
" _res = sts.cramervonmises_2samp(data_x, data_y)\n",
@@ -71,9 +70,11 @@
7170
"\n",
7271
"def anderson_pval(data_1, data_2):\n",
7372
" try:\n",
74-
" _res = sts.anderson_ksamp([data_1, data_2], method=sts.PermutationMethod(n_resamples=100))\n",
73+
" _res = sts.anderson_ksamp(\n",
74+
" [data_1, data_2], method=sts.PermutationMethod(n_resamples=100)\n",
75+
" )\n",
7576
" except ValueError:\n",
76-
" return 1.\n",
77+
" return 1.0\n",
7778
" return _res.pvalue\n",
7879
"\n",
7980
"\n",
@@ -87,7 +88,6 @@
8788
"\n",
8889
"\n",
8990
"def epps_singleton(data_1, data_2):\n",
90-
"\n",
9191
" # print(data_1.shape, data_2.shape)\n",
9292
" try:\n",
9393
" _out = sts.epps_singleton_2samp(data_1, data_2, axis=1).pvalue\n",
@@ -97,7 +97,6 @@
9797
"\n",
9898
"\n",
9999
"def test_all_times(data, ens_ids, test_fcn):\n",
100-
"\n",
101100
" \"\"\"Perform statistical test on two arrays across all times in the array.\n",
102101
"\n",
103102
" Parameters\n",
@@ -117,9 +116,7 @@
117116
"\n",
118117
" _pval = test_fcn(data_1.T, data_2.T)\n",
119118
" try:\n",
120-
" _out = xr.DataArray(\n",
121-
" data=_pval, dims=(\"time\",), coords={\"time\": data.time}\n",
122-
" )\n",
119+
" _out = xr.DataArray(data=_pval, dims=(\"time\",), coords={\"time\": data.time})\n",
123120
" except ValueError as _err:\n",
124121
" print(_err)\n",
125122
" return None\n",
@@ -178,9 +175,7 @@
178175
"\n",
179176
"# _ds_all = xr.concat([_ds_ctl, _ds_exp], dim=\"exp\")\n",
180177
"_ds_all = xr.concat([_ds_ctl, _ds_exp], dim=\"exp\")\n",
181-
"dvars = json.loads(\n",
182-
" open(\"../new_vars.json\", \"r\", encoding=\"utf-8\").read()\n",
183-
")[\"default\"]\n",
178+
"dvars = json.loads(open(\"../new_vars.json\", \"r\", encoding=\"utf-8\").read())[\"default\"]\n",
184179
"\n",
185180
"_ds_all_mean = _ds_all[dvars].map(rolling_mean_data, period_len=12)\n",
186181
"_emin = _ds_all_mean.ens.values.min()\n",
@@ -213,7 +208,10 @@
213208
" unique = True\n",
214209
"else:\n",
215210
" unique = False\n",
216-
"ens_sel = [randomise_new(_emin, _emax, ens_size=ens_size, ncases=2, uniq=unique) for _ in range(ninst)]"
211+
"ens_sel = [\n",
212+
" randomise_new(_emin, _emax, ens_size=ens_size, ncases=2, uniq=unique)\n",
213+
" for _ in range(ninst)\n",
214+
"]"
217215
]
218216
},
219217
{
@@ -225,7 +223,9 @@
225223
"source": [
226224
"%%time\n",
227225
"# ks_bootsrap_part = partial(ks_bootstrap, data=_ds_all_mean[dvars])\n",
228-
"ks_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=ks_test_vec)\n",
226+
"ks_bootstrap_part = partial(\n",
227+
" bootstrap_test, data=_ds_all_mean[dvars], test_fcn=ks_test_vec\n",
228+
")\n",
229229
"with mp.Pool(16) as pool:\n",
230230
" pvals_out_ks = xr.concat(pool.map(ks_bootstrap_part, ens_sel), dim=\"iter\")"
231231
]
@@ -238,7 +238,9 @@
238238
"outputs": [],
239239
"source": [
240240
"%%time\n",
241-
"es_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=epps_singleton)\n",
241+
"es_bootstrap_part = partial(\n",
242+
" bootstrap_test, data=_ds_all_mean[dvars], test_fcn=epps_singleton\n",
243+
")\n",
242244
"with mp.Pool(16) as pool:\n",
243245
" pvals_out_es = xr.concat(pool.map(es_bootstrap_part, ens_sel), dim=\"iter\")"
244246
]
@@ -251,9 +253,11 @@
251253
"outputs": [],
252254
"source": [
253255
"%%time\n",
254-
"mw_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=mannwhitney)\n",
256+
"mw_bootstrap_part = partial(\n",
257+
" bootstrap_test, data=_ds_all_mean[dvars], test_fcn=mannwhitney\n",
258+
")\n",
255259
"with mp.Pool(16) as pool:\n",
256-
" pvals_out_mw = xr.concat(pool.map(mw_bootstrap_part, ens_sel), dim=\"iter\")\n"
260+
" pvals_out_mw = xr.concat(pool.map(mw_bootstrap_part, ens_sel), dim=\"iter\")"
257261
]
258262
},
259263
{
@@ -275,7 +279,9 @@
275279
"outputs": [],
276280
"source": [
277281
"%%time\n",
278-
"cvm_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=cvm_test_vec)\n",
282+
"cvm_bootstrap_part = partial(\n",
283+
" bootstrap_test, data=_ds_all_mean[dvars], test_fcn=cvm_test_vec\n",
284+
")\n",
279285
"with mp.Pool(16) as pool:\n",
280286
" pvals_out_cvm = xr.concat(pool.map(cvm_bootstrap_part, ens_sel), dim=\"iter\")"
281287
]
@@ -363,7 +369,17 @@
363369
" # _ = axis[idx].semilogy(pvals, color=\"grey\", lw=0.5)\n",
364370
" _ = axis[idx].semilogy(np.median(pvals, axis=1), color=\"k\")\n",
365371
" # methods = [\"fdr_bh\", \"fdr_by\", \"bonferroni\", \"sidak\", \"holm-sidak\", \"holm\", \"simes-hochberg\", \"hommel\", \"fdr_tsbh\", \"fdr_tsbky\"]\n",
366-
" methods = [\"fdr_bh\", \"fdr_by\", \"bonferroni\", \"sidak\", \"holm-sidak\", \"simes-hochberg\", \"hommel\", \"fdr_tsbh\", \"fdr_tsbky\"]\n",
372+
" methods = [\n",
373+
" \"fdr_bh\",\n",
374+
" \"fdr_by\",\n",
375+
" \"bonferroni\",\n",
376+
" \"sidak\",\n",
377+
" \"holm-sidak\",\n",
378+
" \"simes-hochberg\",\n",
379+
" \"hommel\",\n",
380+
" \"fdr_tsbh\",\n",
381+
" \"fdr_tsbky\",\n",
382+
" ]\n",
367383
" # methods = [\"fdr_bh\"]\n",
368384
" for _method in methods:\n",
369385
" _pval_cr = np.array(\n",
@@ -392,7 +408,10 @@
392408
"outputs": [],
393409
"source": [
394410
"nreject = {\n",
395-
" mode: [(pvals_all[mode][i, :, -1] < 0.05).sum() for i in range(pvals_all[mode].shape[0])]\n",
411+
" mode: [\n",
412+
" (pvals_all[mode][i, :, -1] < 0.05).sum()\n",
413+
" for i in range(pvals_all[mode].shape[0])\n",
414+
" ]\n",
396415
" for mode in pvals_all\n",
397416
"}\n",
398417
"nreject_cr = {}\n",
@@ -411,7 +430,9 @@
411430
" for kdx in range(pvals_all[mode].shape[1])\n",
412431
" ]\n",
413432
" )\n",
414-
" nreject_cr[mode] = [(_pval_cr[:, i] < 0.05).sum() for i in range(pvals_all[mode].shape[0])]"
433+
" nreject_cr[mode] = [\n",
434+
" (_pval_cr[:, i] < 0.05).sum() for i in range(pvals_all[mode].shape[0])\n",
435+
" ]"
415436
]
416437
},
417438
{
@@ -466,7 +487,7 @@
466487
" # plt.gca().set_xlim([26, 46])\n",
467488
" # plt.gca().set_ylim([0, 40])\n",
468489
" plt.title(mode)\n",
469-
"plt.tight_layout()\n"
490+
"plt.tight_layout()"
470491
]
471492
},
472493
{
@@ -517,7 +538,7 @@
517538
" \"desc\": f\"2-sample {test_name} p-value\",\n",
518539
" \"long_name\": f\"{test_name}_pvalue\",\n",
519540
" \"short_name\": f\"{test_name}_pvalue\",\n",
520-
" },\n",
541+
" },\n",
521542
" )"
522543
]
523544
},
@@ -530,7 +551,9 @@
530551
"source": [
531552
"ds_out = {}\n",
532553
"for _test in pvals_all:\n",
533-
" ds_out[f\"{_test}_pval\"] = to_dataarray(pvals_all[_test], dvars, pvals_out_ks.time, _test)\n",
554+
" ds_out[f\"{_test}_pval\"] = to_dataarray(\n",
555+
" pvals_all[_test], dvars, pvals_out_ks.time, _test\n",
556+
" )\n",
534557
"xr.Dataset(ds_out)"
535558
]
536559
},
@@ -541,7 +564,12 @@
541564
"metadata": {},
542565
"outputs": [],
543566
"source": [
544-
"plt.loglog(pvals_all[\"ks\"][:, :, -1].flatten(), pvals_all[\"cvm\"][:, :, -1].flatten(), \".\", alpha=0.5)"
567+
"plt.loglog(\n",
568+
" pvals_all[\"ks\"][:, :, -1].flatten(),\n",
569+
" pvals_all[\"cvm\"][:, :, -1].flatten(),\n",
570+
" \".\",\n",
571+
" alpha=0.5,\n",
572+
")"
545573
]
546574
},
547575
{
@@ -552,7 +580,9 @@
552580
"outputs": [],
553581
"source": [
554582
"_ds_all\n",
555-
"mwu = sts.mannwhitneyu(_ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0)"
583+
"mwu = sts.mannwhitneyu(\n",
584+
" _ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0\n",
585+
")"
556586
]
557587
},
558588
{
@@ -562,7 +592,9 @@
562592
"metadata": {},
563593
"outputs": [],
564594
"source": [
565-
"esp = sts.epps_singleton_2samp(_ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0).pvalue"
595+
"esp = sts.epps_singleton_2samp(\n",
596+
" _ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0\n",
597+
").pvalue"
566598
]
567599
},
568600
{

0 commit comments

Comments
 (0)