|
18 | 18 | "from functools import partial\n", |
19 | 19 | "from statsmodels.stats import multitest as smm\n", |
20 | 20 | "import multiprocessing as mp\n", |
21 | | - "import seaborn as sns\n", |
22 | | - "from dask.distributed import Client" |
| 21 | + "import seaborn as sns" |
23 | 22 | ] |
24 | 23 | }, |
25 | 24 | { |
|
39 | 38 | " ens_idx = sorted(range(ens_min, ens_max + 1))\n", |
40 | 39 | " assert len(ens_idx) > ens_size, \"ENSEMBLE SIZE MUST BE SMALLER THAN ENSEMBLE RANGE\"\n", |
41 | 40 | " if not with_repl and not uniq:\n", |
42 | | - " selected = [\n", |
43 | | - " random.sample(ens_idx, ens_size)\n", |
44 | | - " for _ in range(ncases)\n", |
45 | | - " ]\n", |
| 41 | + " selected = [random.sample(ens_idx, ens_size) for _ in range(ncases)]\n", |
46 | 42 | " elif not with_repl:\n", |
47 | 43 | " _sel = random.sample(ens_idx, ens_size * ncases)\n", |
48 | | - " selected = [_sel[idx * ens_size : (idx + 1) * ens_size] for idx in range(ncases)]\n", |
| 44 | + " selected = [\n", |
| 45 | + " _sel[idx * ens_size : (idx + 1) * ens_size] for idx in range(ncases)\n", |
| 46 | + " ]\n", |
49 | 47 | " else:\n", |
50 | 48 | " selected = [\n", |
51 | 49 | " [random.randint(ens_min, ens_max) for _ in range(ens_size)]\n", |
|
63 | 61 | " _res = sts.mstats.ks_2samp(data_x, data_y)\n", |
64 | 62 | " return _res[1]\n", |
65 | 63 | "\n", |
| 64 | + "\n", |
66 | 65 | "def cvm_2samp(data_x, data_y):\n", |
67 | 66 | " \"\"\"Perform a 2 sample Cramer von Mises test, map output to a tuple.\"\"\"\n", |
68 | 67 | " _res = sts.cramervonmises_2samp(data_x, data_y)\n", |
|
71 | 70 | "\n", |
72 | 71 | "def anderson_pval(data_1, data_2):\n", |
73 | 72 | " try:\n", |
74 | | - " _res = sts.anderson_ksamp([data_1, data_2], method=sts.PermutationMethod(n_resamples=100))\n", |
| 73 | + " _res = sts.anderson_ksamp(\n", |
| 74 | + " [data_1, data_2], method=sts.PermutationMethod(n_resamples=100)\n", |
| 75 | + " )\n", |
75 | 76 | " except ValueError:\n", |
76 | | - " return 1.\n", |
| 77 | + " return 1.0\n", |
77 | 78 | " return _res.pvalue\n", |
78 | 79 | "\n", |
79 | 80 | "\n", |
|
87 | 88 | "\n", |
88 | 89 | "\n", |
89 | 90 | "def epps_singleton(data_1, data_2):\n", |
90 | | - "\n", |
91 | 91 | " # print(data_1.shape, data_2.shape)\n", |
92 | 92 | " try:\n", |
93 | 93 | " _out = sts.epps_singleton_2samp(data_1, data_2, axis=1).pvalue\n", |
|
97 | 97 | "\n", |
98 | 98 | "\n", |
99 | 99 | "def test_all_times(data, ens_ids, test_fcn):\n", |
100 | | - "\n", |
101 | 100 | " \"\"\"Perform statistical test on two arrays across all times in the array.\n", |
102 | 101 | "\n", |
103 | 102 | " Parameters\n", |
|
117 | 116 | "\n", |
118 | 117 | " _pval = test_fcn(data_1.T, data_2.T)\n", |
119 | 118 | " try:\n", |
120 | | - " _out = xr.DataArray(\n", |
121 | | - " data=_pval, dims=(\"time\",), coords={\"time\": data.time}\n", |
122 | | - " )\n", |
| 119 | + " _out = xr.DataArray(data=_pval, dims=(\"time\",), coords={\"time\": data.time})\n", |
123 | 120 | " except ValueError as _err:\n", |
124 | 121 | " print(_err)\n", |
125 | 122 | " return None\n", |
|
178 | 175 | "\n", |
179 | 176 | "# _ds_all = xr.concat([_ds_ctl, _ds_exp], dim=\"exp\")\n", |
180 | 177 | "_ds_all = xr.concat([_ds_ctl, _ds_exp], dim=\"exp\")\n", |
181 | | - "dvars = json.loads(\n", |
182 | | - " open(\"../new_vars.json\", \"r\", encoding=\"utf-8\").read()\n", |
183 | | - ")[\"default\"]\n", |
| 178 | + "dvars = json.loads(open(\"../new_vars.json\", \"r\", encoding=\"utf-8\").read())[\"default\"]\n", |
184 | 179 | "\n", |
185 | 180 | "_ds_all_mean = _ds_all[dvars].map(rolling_mean_data, period_len=12)\n", |
186 | 181 | "_emin = _ds_all_mean.ens.values.min()\n", |
|
213 | 208 | " unique = True\n", |
214 | 209 | "else:\n", |
215 | 210 | " unique = False\n", |
216 | | - "ens_sel = [randomise_new(_emin, _emax, ens_size=ens_size, ncases=2, uniq=unique) for _ in range(ninst)]" |
| 211 | + "ens_sel = [\n", |
| 212 | + " randomise_new(_emin, _emax, ens_size=ens_size, ncases=2, uniq=unique)\n", |
| 213 | + " for _ in range(ninst)\n", |
| 214 | + "]" |
217 | 215 | ] |
218 | 216 | }, |
219 | 217 | { |
|
225 | 223 | "source": [ |
226 | 224 | "%%time\n", |
227 | 225 | "# ks_bootsrap_part = partial(ks_bootstrap, data=_ds_all_mean[dvars])\n", |
228 | | - "ks_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=ks_test_vec)\n", |
| 226 | + "ks_bootstrap_part = partial(\n", |
| 227 | + " bootstrap_test, data=_ds_all_mean[dvars], test_fcn=ks_test_vec\n", |
| 228 | + ")\n", |
229 | 229 | "with mp.Pool(16) as pool:\n", |
230 | 230 | " pvals_out_ks = xr.concat(pool.map(ks_bootstrap_part, ens_sel), dim=\"iter\")" |
231 | 231 | ] |
|
238 | 238 | "outputs": [], |
239 | 239 | "source": [ |
240 | 240 | "%%time\n", |
241 | | - "es_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=epps_singleton)\n", |
| 241 | + "es_bootstrap_part = partial(\n", |
| 242 | + " bootstrap_test, data=_ds_all_mean[dvars], test_fcn=epps_singleton\n", |
| 243 | + ")\n", |
242 | 244 | "with mp.Pool(16) as pool:\n", |
243 | 245 | " pvals_out_es = xr.concat(pool.map(es_bootstrap_part, ens_sel), dim=\"iter\")" |
244 | 246 | ] |
|
251 | 253 | "outputs": [], |
252 | 254 | "source": [ |
253 | 255 | "%%time\n", |
254 | | - "mw_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=mannwhitney)\n", |
| 256 | + "mw_bootstrap_part = partial(\n", |
| 257 | + " bootstrap_test, data=_ds_all_mean[dvars], test_fcn=mannwhitney\n", |
| 258 | + ")\n", |
255 | 259 | "with mp.Pool(16) as pool:\n", |
256 | | - " pvals_out_mw = xr.concat(pool.map(mw_bootstrap_part, ens_sel), dim=\"iter\")\n" |
| 260 | + " pvals_out_mw = xr.concat(pool.map(mw_bootstrap_part, ens_sel), dim=\"iter\")" |
257 | 261 | ] |
258 | 262 | }, |
259 | 263 | { |
|
275 | 279 | "outputs": [], |
276 | 280 | "source": [ |
277 | 281 | "%%time\n", |
278 | | - "cvm_bootstrap_part = partial(bootstrap_test, data=_ds_all_mean[dvars], test_fcn=cvm_test_vec)\n", |
| 282 | + "cvm_bootstrap_part = partial(\n", |
| 283 | + " bootstrap_test, data=_ds_all_mean[dvars], test_fcn=cvm_test_vec\n", |
| 284 | + ")\n", |
279 | 285 | "with mp.Pool(16) as pool:\n", |
280 | 286 | " pvals_out_cvm = xr.concat(pool.map(cvm_bootstrap_part, ens_sel), dim=\"iter\")" |
281 | 287 | ] |
|
363 | 369 | " # _ = axis[idx].semilogy(pvals, color=\"grey\", lw=0.5)\n", |
364 | 370 | " _ = axis[idx].semilogy(np.median(pvals, axis=1), color=\"k\")\n", |
365 | 371 | " # methods = [\"fdr_bh\", \"fdr_by\", \"bonferroni\", \"sidak\", \"holm-sidak\", \"holm\", \"simes-hochberg\", \"hommel\", \"fdr_tsbh\", \"fdr_tsbky\"]\n", |
366 | | - " methods = [\"fdr_bh\", \"fdr_by\", \"bonferroni\", \"sidak\", \"holm-sidak\", \"simes-hochberg\", \"hommel\", \"fdr_tsbh\", \"fdr_tsbky\"]\n", |
| 372 | + " methods = [\n", |
| 373 | + " \"fdr_bh\",\n", |
| 374 | + " \"fdr_by\",\n", |
| 375 | + " \"bonferroni\",\n", |
| 376 | + " \"sidak\",\n", |
| 377 | + " \"holm-sidak\",\n", |
| 378 | + " \"simes-hochberg\",\n", |
| 379 | + " \"hommel\",\n", |
| 380 | + " \"fdr_tsbh\",\n", |
| 381 | + " \"fdr_tsbky\",\n", |
| 382 | + " ]\n", |
367 | 383 | " # methods = [\"fdr_bh\"]\n", |
368 | 384 | " for _method in methods:\n", |
369 | 385 | " _pval_cr = np.array(\n", |
|
392 | 408 | "outputs": [], |
393 | 409 | "source": [ |
394 | 410 | "nreject = {\n", |
395 | | - " mode: [(pvals_all[mode][i, :, -1] < 0.05).sum() for i in range(pvals_all[mode].shape[0])]\n", |
| 411 | + " mode: [\n", |
| 412 | + " (pvals_all[mode][i, :, -1] < 0.05).sum()\n", |
| 413 | + " for i in range(pvals_all[mode].shape[0])\n", |
| 414 | + " ]\n", |
396 | 415 | " for mode in pvals_all\n", |
397 | 416 | "}\n", |
398 | 417 | "nreject_cr = {}\n", |
|
411 | 430 | " for kdx in range(pvals_all[mode].shape[1])\n", |
412 | 431 | " ]\n", |
413 | 432 | " )\n", |
414 | | - " nreject_cr[mode] = [(_pval_cr[:, i] < 0.05).sum() for i in range(pvals_all[mode].shape[0])]" |
| 433 | + " nreject_cr[mode] = [\n", |
| 434 | + " (_pval_cr[:, i] < 0.05).sum() for i in range(pvals_all[mode].shape[0])\n", |
| 435 | + " ]" |
415 | 436 | ] |
416 | 437 | }, |
417 | 438 | { |
|
466 | 487 | " # plt.gca().set_xlim([26, 46])\n", |
467 | 488 | " # plt.gca().set_ylim([0, 40])\n", |
468 | 489 | " plt.title(mode)\n", |
469 | | - "plt.tight_layout()\n" |
| 490 | + "plt.tight_layout()" |
470 | 491 | ] |
471 | 492 | }, |
472 | 493 | { |
|
517 | 538 | " \"desc\": f\"2-sample {test_name} p-value\",\n", |
518 | 539 | " \"long_name\": f\"{test_name}_pvalue\",\n", |
519 | 540 | " \"short_name\": f\"{test_name}_pvalue\",\n", |
520 | | - " },\n", |
| 541 | + " },\n", |
521 | 542 | " )" |
522 | 543 | ] |
523 | 544 | }, |
|
530 | 551 | "source": [ |
531 | 552 | "ds_out = {}\n", |
532 | 553 | "for _test in pvals_all:\n", |
533 | | - " ds_out[f\"{_test}_pval\"] = to_dataarray(pvals_all[_test], dvars, pvals_out_ks.time, _test)\n", |
| 554 | + " ds_out[f\"{_test}_pval\"] = to_dataarray(\n", |
| 555 | + " pvals_all[_test], dvars, pvals_out_ks.time, _test\n", |
| 556 | + " )\n", |
534 | 557 | "xr.Dataset(ds_out)" |
535 | 558 | ] |
536 | 559 | }, |
|
541 | 564 | "metadata": {}, |
542 | 565 | "outputs": [], |
543 | 566 | "source": [ |
544 | | - "plt.loglog(pvals_all[\"ks\"][:, :, -1].flatten(), pvals_all[\"cvm\"][:, :, -1].flatten(), \".\", alpha=0.5)" |
| 567 | + "plt.loglog(\n", |
| 568 | + " pvals_all[\"ks\"][:, :, -1].flatten(),\n", |
| 569 | + " pvals_all[\"cvm\"][:, :, -1].flatten(),\n", |
| 570 | + " \".\",\n", |
| 571 | + " alpha=0.5,\n", |
| 572 | + ")" |
545 | 573 | ] |
546 | 574 | }, |
547 | 575 | { |
|
552 | 580 | "outputs": [], |
553 | 581 | "source": [ |
554 | 582 | "_ds_all\n", |
555 | | - "mwu = sts.mannwhitneyu(_ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0)" |
| 583 | + "mwu = sts.mannwhitneyu(\n", |
| 584 | + " _ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0\n", |
| 585 | + ")" |
556 | 586 | ] |
557 | 587 | }, |
558 | 588 | { |
|
562 | 592 | "metadata": {}, |
563 | 593 | "outputs": [], |
564 | 594 | "source": [ |
565 | | - "esp = sts.epps_singleton_2samp(_ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0).pvalue" |
| 595 | + "esp = sts.epps_singleton_2samp(\n", |
| 596 | + " _ds_all[\"T\"].isel(exp=0).values, _ds_all[\"T\"].isel(exp=1).values, axis=0\n", |
| 597 | + ").pvalue" |
566 | 598 | ] |
567 | 599 | }, |
568 | 600 | { |
|
0 commit comments