Skip to content

Commit c21fabe

Browse files
committed
Rudimentary catalog init refactoring to use a config file
1 parent fa08613 commit c21fabe

File tree

3 files changed

+103
-1
lines changed

3 files changed

+103
-1
lines changed

deltacat/catalog/model/catalog.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from deltacat.catalog.main import impl as dcat
1313
from deltacat.catalog.model.properties import CatalogProperties
1414
from deltacat.constants import DEFAULT_CATALOG
15+
from deltacat.utils.config_loader import load_catalog_configs_from_yaml
1516

1617
all_catalogs: Optional[ray.actor.ActorHandle] = None
1718

@@ -182,6 +183,7 @@ def init(
182183
ray_init_args: Dict[str, Any] = {},
183184
*,
184185
force=False,
186+
config_path: Optional[str] = None,
185187
) -> Optional[ray.runtime.BaseContext]:
186188
"""
187189
Initialize DeltaCAT catalogs.
@@ -202,6 +204,16 @@ def init(
202204
logger.warning("DeltaCAT already initialized.")
203205
return None
204206

207+
# If catalogs are provided and a config_path is also provided, raise ValueError
208+
if catalogs and config_path is not None:
209+
raise ValueError(
210+
"Cannot provide both `catalogs` and `config_path`. Please provide "
211+
"only one of these parameters."
212+
)
213+
# If no catalogs provided but a config_path exists, create a Catalog
214+
if not catalogs and config_path is not None:
215+
catalogs = load_catalog_configs_from_yaml(config_path=config_path)
216+
205217
# initialize ray (and ignore reinitialization errors)
206218
ray_init_args["ignore_reinit_error"] = True
207219
context = ray.init(**ray_init_args)

deltacat/tests/catalog/test_catalogs.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import uuid
55
from unittest import mock
66
import os
7+
import yaml
78

89
from deltacat.catalog import (
910
CatalogProperties,
@@ -147,11 +148,54 @@ def test_init_with_default_catalog_name(self, reset_catalogs):
147148
force=True,
148149
)
149150

150-
# Get the default catalog and check it's catalog2
151+
# Get the def ault catalog and check it's catalog2
151152
default_catalog = get_catalog()
152153
assert default_catalog.impl == MockCatalogImpl
153154
assert default_catalog.inner["kwargs"]["id"] == 2
154155

156+
def test_init_with_config_yaml(self, tmp_path, reset_catalogs):
157+
"""
158+
Test initializing Catalogs from a YAML config file.
159+
CatalogProperties will infer filesystem and storage, so we don't
160+
put actual Python objects in the YAML.
161+
- load_catalog_configs_from_yaml returns a dict[str, CatalogProperties],
162+
so we provide one entry keyed by a catalog name.
163+
- get_catalog() with no arguments should always return the *default*
164+
catalog, even if multiple catalogs are loaded. In this case we only
165+
provide one catalog ("test_catalog"), so it should automatically
166+
become the default.
167+
"""
168+
# YAML data compatible with CatalogProperties
169+
config_data = {
170+
"test-catalog": {
171+
"root": str(tmp_path), # path for catalog metadata/data
172+
"filesystem": None, # leave None; CatalogProperties will infer filesystem
173+
"storage": None, # leave None; CatalogProperties will infer storage
174+
}
175+
}
176+
177+
# Write the YAML config file
178+
config_path = tmp_path / "config.yaml"
179+
with open(config_path, "w") as f:
180+
yaml.dump(config_data, f)
181+
182+
# Initialize from the YAML config
183+
init(config_path=str(config_path), force=True)
184+
185+
# Retrieve the default catalog (returns CatalogProperties directly)
186+
catalog_props = get_catalog() # no args → default "test-catalog"
187+
188+
# Should be a CatalogProperties instance
189+
assert isinstance(catalog_props, CatalogProperties)
190+
assert catalog_props.root == str(tmp_path)
191+
192+
# filesystem and storage are inferred by CatalogProperties; check types
193+
import pyarrow.fs
194+
195+
assert isinstance(catalog_props.filesystem, pyarrow.fs.FileSystem)
196+
# TODO: If storage has a default class, check its type here
197+
# assert isinstance(inner.storage, ExpectedStorageClass)
198+
155199
def test_put_catalog(self, reset_catalogs):
156200
"""Test adding a catalog after initialization."""
157201
# Initialize with a single catalog

deltacat/utils/config_loader.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import yaml
2+
from typing import Dict
3+
from deltacat.catalog.model.properties import CatalogProperties
4+
5+
6+
def load_catalog_configs_from_yaml(config_path: str) -> Dict[str, CatalogProperties]:
7+
"""
8+
Load one or more catalog configs from a YAML file.
9+
10+
The YAML can either be:
11+
1. A single unnamed config (a dict of properties) -> wrapped as {"default": CatalogProperties}
12+
2. A dictionary of named configs: name -> property-mapping
13+
14+
Args:
15+
config_path: Path to the YAML config file.
16+
17+
Returns:
18+
Dict[str, CatalogProperties]: Mapping of Catalog name -> CatalogProperties.
19+
"""
20+
with open(config_path, "r") as f:
21+
config_data = yaml.safe_load(f)
22+
23+
if not isinstance(config_data, dict):
24+
raise ValueError(
25+
f"Invalid YAML format in {config_path}. "
26+
f"Expected a dict, got {type(config_data)}"
27+
)
28+
29+
# Case 1: single unnamed config
30+
# e.g. {"type": "iceberg", "uri": "...", "warehouse": "prod"}
31+
if all(
32+
isinstance(v, (str, int, float, bool, type(None))) or isinstance(v, list)
33+
for v in config_data.values()
34+
):
35+
return {"default": CatalogProperties(**config_data)}
36+
37+
# Case 2: top-level dict of name -> dict-of-properties
38+
catalogs: Dict[str, CatalogProperties] = {}
39+
for name, props in config_data.items():
40+
if not isinstance(props, dict):
41+
raise ValueError(
42+
f"Config for catalog '{name}' must be a mapping, got {type(props)}"
43+
)
44+
catalogs[name] = CatalogProperties(**props)
45+
46+
return catalogs

0 commit comments

Comments
 (0)