|
1 | | -"""Utility functions for working with butler commands""" |
| 1 | +"""Module in support of working with Butlers. |
| 2 | +
|
| 3 | +A CM Service that has a configured ``BUTLER_REPO_INDEX`` provides a |
| 4 | +factory function that manages a pool of Butler instances for each of these |
| 5 | +repos. This factory function is inspired by the |
| 6 | +`lsst.daf.butler.LabeledButlerFactory` and provides ``clone()`` instances of |
| 7 | +available Butlers when asked to provide one. |
| 8 | +
|
| 9 | +Notes |
| 10 | +----- |
| 11 | +The butler "factory" follows a "global" pattern where it is assigned to |
| 12 | +a module-level variable at import-time. Other modules can import this factory |
| 13 | +and use it to produce on-demand butler clones. It is not necessary to use a |
| 14 | +butler factory as an injected dependency when using this pattern, but this |
| 15 | +module should be imported as early as possible in the application startup; |
| 16 | +it does not depend on a running event loop. |
| 17 | +""" |
2 | 18 |
|
3 | | -from functools import partial |
| 19 | +from collections.abc import Callable |
| 20 | +from functools import cache, partial |
| 21 | +from typing import TYPE_CHECKING |
4 | 22 |
|
5 | | -import yaml |
6 | | -from anyio import Path, to_thread |
7 | | -from sqlalchemy.engine import url |
| 23 | +from anyio import to_thread |
| 24 | +from botocore.exceptions import ClientError |
| 25 | +from sqlalchemy.exc import OperationalError |
8 | 26 |
|
9 | 27 | from lsst.daf.butler import Butler, ButlerConfig, ButlerRepoIndex |
10 | 28 | from lsst.daf.butler._exceptions import MissingCollectionError |
| 29 | +from lsst.daf.butler.direct_butler import DirectButler |
| 30 | +from lsst.daf.butler.registry import CollectionArgType, RegistryConfig |
11 | 31 | from lsst.resources import ResourcePathExpression |
12 | | -from lsst.utils.db_auth import DbAuth |
13 | 32 |
|
14 | 33 | from ..config import config |
15 | 34 | from . import errors |
|
20 | 39 |
|
21 | 40 | BUTLER_REPO_INDEX = ButlerRepoIndex() |
22 | 41 | """An index of all known butler repositories, as populated by the |
23 | | -DAF_BUTLER_REPOSITORIES environment variable. |
| 42 | +``DAF_BUTLER_REPOSITORIES`` environment variable. |
24 | 43 | """ |
25 | 44 |
|
26 | 45 |
|
27 | | -async def get_butler_config(repo: str, *, without_datastore: bool = False) -> ButlerConfig: |
28 | | - """Create a butler config object for a repo known to the service's |
29 | | - environment. |
| 46 | +class ButlerFactory: |
| 47 | + """The ButlerFactory will create an instance of each Butler known to the |
| 48 | + application during initialization. This occurs synchronously so it is best |
| 49 | + performed at application startup. After initializing, the factory can hand |
| 50 | + out ``clone()`` copies of available Butlers. |
30 | 51 | """ |
31 | 52 |
|
32 | | - try: |
33 | | - repo_uri: ResourcePathExpression = BUTLER_REPO_INDEX.get_repo_uri(label=repo) |
34 | | - except KeyError: |
35 | | - # No such repo known to the service |
36 | | - logger.warning("Butler repo %s not known to environment.", repo) |
37 | | - repo_uri = repo |
38 | | - |
39 | | - try: |
40 | | - bc_f = partial( |
41 | | - ButlerConfig, |
42 | | - other=repo_uri, |
43 | | - without_datastore=without_datastore, |
44 | | - ) |
45 | | - bc = await to_thread.run_sync(bc_f) |
46 | | - except FileNotFoundError: |
47 | | - # No such repo known to Butler |
48 | | - logger.error("Butler repo %s not known.", repo) |
49 | | - raise RuntimeError("Unknown Butler Repo %s", repo) |
| 53 | + def __init__(self) -> None: |
| 54 | + """Initialize a ButlerFactory by creating butler pool instances for |
| 55 | + each known repository. |
| 56 | + """ |
| 57 | + # create and cache any butler factories known to the service |
| 58 | + for label in BUTLER_REPO_INDEX.get_known_repos(): |
| 59 | + if config.butler.eager: |
| 60 | + _ = self.get_butler_factory(label) |
| 61 | + |
| 62 | + def get_butler(self, label: str, collections: list[str] | None = None) -> Butler | None: |
| 63 | + """Get a butler clone from the factory with the specified collections |
| 64 | + constraint applied. |
| 65 | +
|
| 66 | + Notes |
| 67 | + ----- |
| 68 | + This is the primary public interface to the factory object. |
| 69 | + """ |
| 70 | + factory = self.get_butler_factory(label) |
| 71 | + if factory is None: |
| 72 | + return None |
| 73 | + return factory(collections=collections) |
| 74 | + |
| 75 | + @cache |
| 76 | + def get_butler_factory( |
| 77 | + self, label: str, *, without_datastore: bool = True |
| 78 | + ) -> Callable[..., Butler] | None: |
| 79 | + """Return a factory function that creates a butler clone. |
| 80 | +
|
| 81 | + Notes |
| 82 | + ----- |
| 83 | + This method is backed by a `functools.cache`, a threadsafe cache. |
| 84 | +
|
| 85 | + If the return value is None, the service could not create a Butler for |
| 86 | + the desired label or no such Butler is configured. In the former case, |
| 87 | + the service log should include exception information related to the |
| 88 | + failed Butler creation. |
| 89 | +
|
| 90 | + If the application's Butler ``eager`` parameter is set, the Factory |
| 91 | + instance instantiates all known Butlers at initialization. If this |
| 92 | + parameter is false, then calling this method the first time will block |
| 93 | + until the requested Butler is ready. |
| 94 | +
|
| 95 | + Returns |
| 96 | + ------- |
| 97 | + `lsst.daf.butler` or `None` |
| 98 | + A cloned instance of a ``Butler`` or None if the labelled Butler |
| 99 | + could not be created from the configuration inputs. |
| 100 | + """ |
| 101 | + try: |
| 102 | + _butler_config = self.get_butler_config(label=label) |
| 103 | + _butler = Butler.from_config(_butler_config, without_datastore=without_datastore) |
| 104 | + if TYPE_CHECKING: |
| 105 | + assert isinstance(_butler, DirectButler) |
| 106 | + _butler._preload_cache(load_dimension_record_cache=False) |
| 107 | + except (ClientError, OperationalError): |
| 108 | + # Case that configured butler could not be created because of an |
| 109 | + # S3 or database error |
| 110 | + logger.exception() |
| 111 | + return None |
| 112 | + except KeyError: |
| 113 | + # Case that no such butler was configured |
| 114 | + logger.warning("No such butler configured: %s", label) |
| 115 | + return None |
| 116 | + |
| 117 | + def factory(collections: CollectionArgType) -> Butler: |
| 118 | + return _butler.clone(collections=collections) |
| 119 | + |
| 120 | + return factory |
| 121 | + |
| 122 | + @cache |
| 123 | + def get_butler_config(self, label: str) -> ButlerConfig: |
| 124 | + """Create a butler config object for a repo known to the service's |
| 125 | + environment. |
| 126 | +
|
| 127 | + Returns |
| 128 | + ------- |
| 129 | + ``lsst.daf.butler.ButlerConfig`` |
| 130 | + """ |
| 131 | + |
| 132 | + try: |
| 133 | + repo_uri: ResourcePathExpression = BUTLER_REPO_INDEX.get_repo_uri(label=label) |
| 134 | + except KeyError: |
| 135 | + logger.warning("Butler repo %s not known to environment.", label) |
| 136 | + repo_uri = label |
| 137 | + |
| 138 | + try: |
| 139 | + bc = ButlerConfig(other=repo_uri) |
| 140 | + except FileNotFoundError: |
| 141 | + logger.error("Butler repo %s not known.", label) |
| 142 | + raise RuntimeError("Unknown Butler Repo %s", label) |
50 | 143 |
|
51 | | - try: |
52 | | - db_auth_info = yaml.safe_load(await Path(config.butler.authentication_file).read_text()) |
53 | | - except FileNotFoundError: |
54 | | - logger.error("No Butler Registry authentication secrets found.") |
55 | | - # delegate db auth info discovery to normal toolchain |
56 | 144 | return bc |
57 | 145 |
|
58 | | - db_url = url.make_url(bc["registry"]["db"]) |
59 | | - db_auth = DbAuth(authList=db_auth_info).getAuth( |
60 | | - dialectname=db_url.drivername, |
61 | | - username=db_url.username, |
62 | | - host=db_url.host, |
63 | | - port=db_url.port, |
64 | | - database=db_url.database, |
65 | | - ) |
| 146 | + @cache |
| 147 | + def get_butler_registry_config(self, label: str) -> RegistryConfig: |
| 148 | + """Fetch the Registry Config for a Butler by label. |
| 149 | +
|
| 150 | + Registry |
| 151 | + -------- |
| 152 | + ``lsst.daf.butler.registry.RegistryConfig`` |
| 153 | + """ |
| 154 | + return RegistryConfig(self.get_butler_config(label=label)) |
66 | 155 |
|
67 | | - bc[".registry.username"] = db_auth[0] |
68 | | - bc[".registry.password"] = db_auth[1] |
69 | | - return bc |
| 156 | + |
| 157 | +BUTLER_FACTORY = ButlerFactory() |
| 158 | +"""A module level butler factory created at module import, available for use |
| 159 | +in other modules. |
| 160 | +""" |
70 | 161 |
|
71 | 162 |
|
| 163 | +# TODO: deprecate these functions that attempt to "remove" data from Butlers. |
72 | 164 | async def remove_run_collections( |
73 | 165 | butler_repo: str, |
74 | 166 | collection_name: str, |
|
0 commit comments