use better random seeds and tell others about it (#1340)

sappelhoff · web-flow · commit 049fa394a8f7 · 2024-11-22T16:25:28.000Z
* use better random seeds and tell others about it

* add more details about rng reasoning
diff --git a/examples/anonymize_dataset.py b/examples/anonymize_dataset.py
@@ -194,7 +194,7 @@
 # To ensure results are reproducible across runs, you can pass the
 # ``random_state`` parameter, causing the random number generator to produce
 # the same results every time you execute the function. This may come in handy
-# e.g. in situations where you discover a problem with the data while working
+# in situations where you discover a problem with the data while working
 # with the anonymized dataset, fix the issue in the original dataset, and
 # run anonymization again.
 #
@@ -203,6 +203,15 @@
 # in a dataset with multiple subjects will the effects of randomly-picked IDs
 # become apparent.)
 #
+# A good random seed is truly random. Avoid using random seeds from popular
+# culture, like "42", or "1337". To obtain a truly random seed, you can paste
+# the following into your console:
+# ``python -c "import secrets; print(secrets.randbits(31))"``
+# Here, 31 bits correspond to the maximum seed "size" that the the legacy
+# ``RandomState`` by NumPy, which many scientific libraries still rely on,
+# can accept. For more information, see also this blog post on
+# `NumPy RNG best practices <https://blog.scientific-python.org/numpy/numpy-rng/>`_.
+#
 # .. note::
 #    Passing ``random_state`` merely guarantees that subject IDs and time shift
 #    remain the same across anonymization runs if the original dataset
@@ -218,6 +227,6 @@
         bids_root_in=bids_root,
         bids_root_out=bids_root_anon,
         datatypes="meg",
-        random_state=42,
+        random_state=293201004,
     )
     print_dir_tree(bids_root_anon)
diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py
@@ -3868,7 +3868,7 @@ def test_anonymize_dataset(_bids_validate, tmpdir):
         bids_root_in=bids_root,
         bids_root_out=bids_root_anon,
         datatypes=["meg", "anat"],
-        random_state=42,
+        random_state=1442792182,
     )
     _bids_validate(bids_root_anon)
     assert (bids_root_anon / "sub-1" / "ses-01" / "meg").exists()

Original file line number	Diff line number	Diff line change
`@@ -3868,7 +3868,7 @@ def test_anonymize_dataset(_bids_validate, tmpdir):`
`3868`	`3868`	`bids_root_in=bids_root,`
`3869`	`3869`	`bids_root_out=bids_root_anon,`
`3870`	`3870`	`datatypes=["meg", "anat"],`
`3871`		`- random_state=42,`
	`3871`	`+ random_state=1442792182,`
`3872`	`3872`	`)`
`3873`	`3873`	`_bids_validate(bids_root_anon)`
`3874`	`3874`	`assert (bids_root_anon / "sub-1" / "ses-01" / "meg").exists()`