-
Notifications
You must be signed in to change notification settings - Fork 191
Expand file tree
/
Copy path_settings.py
More file actions
532 lines (452 loc) · 18 KB
/
_settings.py
File metadata and controls
532 lines (452 loc) · 18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
from __future__ import annotations
import inspect
import os
import textwrap
from collections.abc import Iterable
from contextlib import contextmanager
from dataclasses import dataclass, field, fields
from enum import Enum
from functools import partial
from inspect import Parameter, signature
from types import GenericAlias, NoneType
from typing import TYPE_CHECKING, Any, NamedTuple, cast
from ._warnings import warn
from .compat import old_positionals
if TYPE_CHECKING:
from collections.abc import Callable, Generator, Sequence
from typing import Any, Self, TypeGuard
class DeprecatedOption(NamedTuple):
option: str
message: str | None
removal_version: str | None
def _is_plain_type(obj: object) -> TypeGuard[type]:
return isinstance(obj, type) and not isinstance(obj, GenericAlias)
def describe(self: RegisteredOption, *, as_rst: bool = False) -> str:
type_str = self.type.__name__ if _is_plain_type(self.type) else str(self.type)
if as_rst:
default_str = repr(self.default_value).replace("\\", "\\\\")
doc = f"""\
.. attribute:: settings.{self.option}
:type: {type_str}
:value: {default_str}
{self.description}
"""
else:
doc = f"""\
{self.option}: `{type_str}`
{self.description} (default: `{self.default_value!r}`).
"""
return textwrap.dedent(doc)
class RegisteredOption[T](NamedTuple):
option: str
default_value: T
description: str
validate: Callable[[T, SettingsManager], None]
type: object
describe = describe
def check_and_get_environ_var[T](
key: str,
default_value: str,
allowed_values: Sequence[str] | None = None,
cast: Callable[[Any], T] | type[Enum] = lambda x: x,
) -> T:
"""Get the environment variable and return it is a (potentially) non-string, usable value.
Parameters
----------
key
The environment variable name.
default_value
The default value for `os.environ.get`.
allowed_values
Allowable string values., by default None
cast
Casting from the string to a (potentially different) python object, by default lambdax:x
Returns
-------
The casted value.
"""
environ_value_or_default_value = os.environ.get(key, default_value)
if (
allowed_values is not None
and environ_value_or_default_value not in allowed_values
):
msg = (
f"Value {environ_value_or_default_value!r} is not in allowed {allowed_values} for environment variable {key}. "
f"Default {default_value} will be used."
)
warn(msg, UserWarning)
environ_value_or_default_value = default_value
return (
cast(environ_value_or_default_value)
if not isinstance(cast, type(Enum))
else cast[environ_value_or_default_value]
)
def check_and_get_bool(option: str, default_value: bool) -> bool: # noqa: FBT001
return check_and_get_environ_var(
f"ANNDATA_{option.upper()}",
str(int(default_value)),
["0", "1"],
lambda x: bool(int(x)),
)
def check_and_get_bool_or_none(option: str, default_value: bool | None) -> bool | None: # noqa: FBT001
return check_and_get_environ_var(
f"ANNDATA_{option.upper()}",
"" if default_value is None else str(int(default_value)),
["0", "1", ""],
lambda x: None if x == "" else bool(int(x)),
)
def check_and_get_int(option: str, default_value: int) -> int:
return check_and_get_environ_var(
f"ANNDATA_{option.upper()}", str(int(default_value)), None, int
)
_docstring = """
This manager allows users to customize settings for the anndata package.
Settings here will generally be for advanced use-cases and should be used with caution.
The following options are available:
{options_description}
For setting an option please use :func:`~anndata.settings.override` (local) or set the above attributes directly (global) i.e., `anndata.settings.my_setting = foo`.
For assignment by environment variable, use the variable name in all caps with `ANNDATA_` as the prefix before import of :mod:`anndata`.
For boolean environment variable setting, use 1 for `True` and 0 for `False`.
"""
@dataclass
class SettingsManager:
_registered_options: dict[str, RegisteredOption] = field(default_factory=dict)
_deprecated_options: dict[str, DeprecatedOption] = field(default_factory=dict)
_config: dict[str, object] = field(default_factory=dict)
__doc_tmpl__: str = _docstring
def describe(
self,
option: str | Iterable[str] | None = None,
*,
should_print_description: bool = True,
as_rst: bool = False,
) -> str:
"""Print and/or return a (string) description of the option(s).
Parameters
----------
option
Option(s) to be described, by default None (i.e., do all option)
should_print_description
Whether or not to print the description in addition to returning it.
Returns
-------
The description.
"""
describe = partial(
self.describe,
should_print_description=should_print_description,
as_rst=as_rst,
)
if option is None:
return describe(self._registered_options.keys())
if isinstance(option, Iterable) and not isinstance(option, str):
return "\n".join([describe(k) for k in option])
registered_option = self._registered_options[option]
doc = registered_option.describe(as_rst=as_rst).rstrip("\n")
if option in self._deprecated_options:
opt = self._deprecated_options[option]
if opt.message is not None:
doc += f" *{opt.message}"
doc += f" {option} will be removed in {opt.removal_version}.*"
if should_print_description:
print(doc)
return doc
def deprecate(
self, option: str, removal_version: str, message: str | None = None
) -> None:
"""Deprecate options with a message at a version.
Parameters
----------
option
Which option should be deprecated.
removal_version
The version targeted for removal.
message
A custom message.
"""
self._deprecated_options[option] = DeprecatedOption(
option, message, removal_version
)
@old_positionals("default_value", "description", "validate", "option_type")
def register[T](
self,
option: str,
*,
default_value: T,
description: str,
validate: Callable[[T, Self], None],
option_type: object | None = None,
get_from_env: Callable[[str, T], T] = lambda x, y: y,
) -> None:
"""Register an option so it can be set/described etc. by end-users
Parameters
----------
option
Option to be set.
default_value
Default value with which to set the option.
description
Description to be used in the docstring.
validate
A function which raises a `ValueError` or `TypeError` if the value is invalid.
option_type
Optional override for the option type to be displayed. Otherwise `type(default_value)`.
get_from_env
An optional function which takes as arguments the name of the option and a default value and returns the value from the environment variable `ANNDATA_CAPS_OPTION` (or default if not present).
Default behavior is to return `default_value` without checking the environment.
"""
try:
validate(default_value, self)
except (ValueError, TypeError) as e:
e.add_note(f"for option {option!r}")
raise e
option_type = type(default_value) if option_type is None else option_type
self._registered_options[option] = RegisteredOption(
option, default_value, description, validate, option_type
)
self._config[option] = get_from_env(option, default_value)
self._update_override_function_for_new_option(option)
def _update_override_function_for_new_option(
self,
option: str,
) -> None:
"""This function updates the keyword arguments, docstring, and annotations of the `SettingsManager.override` function as the `SettingsManager.register` method is called.
Parameters
----------
option
The option being registered for which the override function needs updating.
"""
option_type = self._registered_options[option].type
# Update annotations for type checking.
self.override.__annotations__[option] = option_type
# __signature__ needs to be updated for tab autocompletion in IPython.
# See https://github.com/ipython/ipython/issues/11624 for inspiration.
self.override.__func__.__signature__ = signature(self.override).replace(
parameters=[
Parameter(name="self", kind=Parameter.POSITIONAL_ONLY),
*[
Parameter(
name=k,
annotation=option_type,
kind=Parameter.KEYWORD_ONLY,
)
for k in self._registered_options
],
]
)
# Update docstring for `SettingsManager.override` as well.
doc = textwrap.dedent(cast("str", self.override.__doc__))
insert_index = doc.find("\n\nYields")
assert insert_index != -1
option_docstring = "".join(
self.describe(option, should_print_description=False).splitlines(
keepends=True
)
)
self.override.__func__.__doc__ = (
f"{doc[:insert_index]}\n{option_docstring}{doc[insert_index:]}"
)
def __setattr__(self, option: str, val: object) -> None:
"""
Set an option to a value. To see the allowed option to be set and their description,
use describe_option.
Parameters
----------
option
Option to be set.
val
Value with which to set the option.
Raises
------
AttributeError
If the option has not been registered, this function will raise an error.
"""
if option in {f.name for f in fields(self)}:
return super().__setattr__(option, val)
elif option not in self._registered_options:
msg = (
f"{option} is not an available option for anndata. "
"Please open an issue if you believe this is a mistake."
)
raise AttributeError(msg)
registered_option = self._registered_options[option]
registered_option.validate(val, self)
self._config[option] = val
def __getattr__(self, option: str) -> object:
"""
Gets the option's value.
Parameters
----------
option
Option to be got.
Returns
-------
Value of the option.
"""
if option in self._deprecated_options:
deprecated = self._deprecated_options[option]
msg = f"{option!r} will be removed in {deprecated.removal_version}. {deprecated.message}"
warn(msg, FutureWarning)
if option in self._config:
return self._config[option]
msg = f"{option} not found."
raise AttributeError(msg)
def __dir__(self) -> Iterable[str]:
return sorted((*super().__dir__(), *self._config.keys()))
def reset(self, option: Iterable[str] | str) -> None:
"""
Resets option(s) to its (their) default value(s).
Parameters
----------
option
The option(s) to be reset.
"""
if isinstance(option, Iterable) and not isinstance(option, str):
for opt in option:
self.reset(opt)
else:
self._config[option] = self._registered_options[option].default_value
@contextmanager
def override(self, **overrides) -> Generator[None]:
"""
Provides local override via keyword arguments as a context manager.
Parameters
----------
Yields
------
None
"""
restore = {a: getattr(self, a) for a in overrides}
try:
# Preserve order so that settings that depend on each other can be overridden together i.e., always override zarr version before sharding.
# Otherwise an error would be raised setting sharding before zarr version if the zarr version is 2.
for k in self._config:
if k in overrides:
setattr(self, k, overrides.get(k))
yield None
finally:
# In the try block, we went in the forward order i.e., zarr version before sharding, but in the reset here, we go in the reverse order i.e., sharding before zarr version.
# Otherwise an error would be raised if we reversed the zarr version first and it was 3 previously.
for k in reversed(self._config.keys()):
if k in restore:
setattr(self, k, restore.get(k))
def __repr__(self) -> str:
params = "".join(f"\t{k}={v!r},\n" for k, v in self._config.items())
return f"{type(self).__name__}(\n{params}\n)"
@property
def __doc__(self):
in_sphinx = any("/sphinx/" in frame.filename for frame in inspect.stack())
options_description = self.describe(
should_print_description=False, as_rst=in_sphinx
)
return self.__doc_tmpl__.format(
options_description=options_description,
)
settings = SettingsManager()
settings.deprecate("copy_on_write_X", "0.14")
##################################################################################
# PLACE REGISTERED SETTINGS HERE SO THEY CAN BE PICKED UP FOR DOCSTRING CREATION #
##################################################################################
def gen_validator[V](
_type: type[V] | tuple[type[V], ...], /
) -> Callable[[V, SettingsManager], None]:
def validate_type(val: V, settings: SettingsManager) -> None:
if not isinstance(val, _type):
msg = f"{val} not valid {_type}"
raise TypeError(msg)
return validate_type
validate_bool = gen_validator(bool)
validate_int = gen_validator(int)
settings.register(
"remove_unused_categories",
default_value=True,
description="Whether or not to remove unused categories with :class:`~pandas.Categorical`.",
validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
"check_uniqueness",
default_value=True,
description=(
"Whether or not to check uniqueness of the `obs` indices on `__init__` of :class:`~anndata.AnnData`."
),
validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
"allow_write_nullable_strings",
default_value=None,
description=(
"Whether or not to allow writing of `pd.arrays.[Arrow]StringArray`. "
"When set to `None`, it will be inferred from `pd.options.future.infer_string`. "
"When set to `False` explicitly, we will try writing `string` arrays in the old, non-nullable format."
),
validate=gen_validator((bool, NoneType)),
option_type=bool | None,
get_from_env=check_and_get_bool_or_none,
)
def validate_zarr_write_format(format: int, settings: SettingsManager):
validate_int(format, settings)
if format not in {2, 3}:
msg = "non-v2 zarr on-disk format not supported"
raise ValueError(msg)
settings.register(
"zarr_write_format",
default_value=2,
description="Which version of zarr to write to when anndata must internally open a write-able zarr group.",
validate=validate_zarr_write_format,
get_from_env=lambda name, default: check_and_get_environ_var(
f"ANNDATA_{name.upper()}", str(default), ["2", "3"], int
),
)
def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
validate_bool(val, settings)
settings.register(
"use_sparse_array_on_read",
default_value=False,
description="Whether or not to use :class:`scipy.sparse.sparray` as the default class when reading in data",
validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
"min_rows_for_chunked_h5_copy",
default_value=1000,
description="Minimum number of rows at a time to copy when writing out an H5 Dataset to a new location",
validate=validate_int,
get_from_env=check_and_get_int,
)
settings.register(
"disallow_forward_slash_in_h5ad",
default_value=False,
description="Whether or not to disallow the `/` character in keys for h5ad files",
validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
"write_csr_csc_indices_with_min_possible_dtype",
default_value=False,
description="Write a csr or csc matrix with the minimum possible data type for `indices`, always unsigned integer.",
validate=validate_bool,
get_from_env=check_and_get_bool,
)
settings.register(
"auto_shard_zarr_v3",
default_value=None,
description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.",
validate=gen_validator((bool, NoneType)),
option_type=bool | None,
get_from_env=check_and_get_bool_or_none,
)
settings.register(
"copy_on_write_X",
default_value=True,
description=(
"Whether to copy-on-write X. "
"Currently `my_adata_view[subset].X = value` will write back to the original AnnData object at the `subset` location. "
"`X` is the only element where this behavior is implemented though."
),
validate=validate_bool,
get_from_env=check_and_get_bool,
)
##################################################################################
##################################################################################