Skip to content

Commit 2e071d6

Browse files
committed
rf: Rename schema adapter, drop bids_schema reference, default load(None)
1 parent 473dd54 commit 2e071d6

6 files changed

Lines changed: 66 additions & 83 deletions

File tree

bids2table/_entities.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import pyarrow as pa
1414

1515
from ._logging import setup_logger
16-
from ._schema import BIDSSchema
16+
from ._schema import SchemaAdapter
1717

1818
BIDSValue = str | int
1919

@@ -93,15 +93,15 @@ def _parse_bids_datatype(path: Path) -> str | None:
9393

9494
def validate_bids_entities(
9595
entities: dict[str, Any],
96-
schema: BIDSSchema | pa.Schema | None = None,
96+
schema: SchemaAdapter | pa.Schema | None = None,
9797
) -> tuple[dict[str, BIDSValue], dict[str, Any]]:
9898
"""Validate BIDS entities against a schema.
9999
100100
Validates the type and allowed values of each entity against the BIDS schema.
101101
102102
Args:
103103
entities: dict mapping BIDS keys to unvalidated entities.
104-
schema: A `BIDSSchema`, a `pa.Schema` (e.g. inside a worker process), or
104+
schema: A `SchemaAdapter`, a `pa.Schema` (e.g. inside a worker process), or
105105
None to use the module-level default.
106106
107107
Returns:
@@ -110,15 +110,14 @@ def validate_bids_entities(
110110
mapping of any leftover entity mappings that didn't match a known entity or
111111
failed validation.
112112
"""
113-
entity_schema, name_entity_map = BIDSSchema.prepare(schema).lookups()
113+
schema_adapter = SchemaAdapter.load(schema)
114114

115115
valid_entities: dict[str, BIDSValue] = {}
116-
extra_entities: dict[str, Any] = {}
116+
extra_entities: dict[str, str] = {}
117117

118118
for name, value in entities.items():
119-
if name in name_entity_map:
120-
entity = name_entity_map[name]
121-
cfg = entity_schema[entity]
119+
if entity := schema_adapter.name_entity_map.get(name):
120+
cfg = schema_adapter.entity_schema[entity]
122121
typ = _BIDS_FORMAT_PY_TYPE_MAP[cfg["format"]]
123122

124123
# Cast to target type.

bids2table/_indexing.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
)
2626
from ._logging import setup_logger
2727
from ._pathlib import CloudPath, PathT, as_path, cloudpathlib_is_available
28-
from ._schema import BIDSSchema
28+
from ._schema import SchemaAdapter
2929

3030
_BIDS_SUBJECT_DIR_PATTERN = re.compile(r"sub-[a-zA-Z0-9]+")
3131

@@ -91,15 +91,15 @@
9191

9292

9393
def get_arrow_schema(
94-
schema: BIDSSchema | pa.Schema | None = None,
94+
schema: SchemaAdapter | pa.Schema | None = None,
9595
) -> pa.Schema:
9696
"""Get Arrow schema of the BIDS dataset index.
9797
9898
Args:
99-
schema: A `BIDSSchema`, a `pa.Schema`, or None to use the module-level
99+
schema: A `SchemaAdapter`, a `pa.Schema`, or None to use the module-level
100100
default BIDS schema.
101101
"""
102-
bids_schema = BIDSSchema.prepare(schema)
102+
bids_schema = SchemaAdapter.load(schema)
103103
entity_schema = bids_schema.arrow_schema
104104
index_fields = {
105105
name: pa.field(name, cfg["dtype"], metadata=cfg["metadata"])
@@ -120,12 +120,12 @@ def get_arrow_schema(
120120

121121

122122
def get_column_names(
123-
schema: BIDSSchema | pa.Schema | None = None,
123+
schema: SchemaAdapter | pa.Schema | None = None,
124124
) -> enum.StrEnum:
125125
"""Get an enum of the BIDS index columns.
126126
127127
Args:
128-
schema: A `BIDSSchema`, a `pa.Schema`, or None to use the module-level
128+
schema: A `SchemaAdapter`, a `pa.Schema`, or None to use the module-level
129129
default BIDS schema.
130130
"""
131131
arrow_schema = get_arrow_schema(schema=schema)
@@ -210,7 +210,7 @@ def index_dataset(
210210
chunksize: int = 32,
211211
executor_cls: type[Executor] = ProcessPoolExecutor,
212212
show_progress: bool = False,
213-
schema: BIDSSchema | pa.Schema | Namespace | str | Path | None = None,
213+
schema: SchemaAdapter | pa.Schema | Namespace | str | Path | None = None,
214214
) -> pa.Table:
215215
"""Index a BIDS dataset.
216216
@@ -226,7 +226,7 @@ def index_dataset(
226226
`ProcessPoolExecutor` when `max_workers > 0`.
227227
executor_cls: Executor class to use for parallel indexing.
228228
show_progress: Show progress bar.
229-
schema: A `BIDSSchema`, `pa.Schema`, `Namespace`, path/URL, or None to use
229+
schema: A `SchemaAdapter`, `pa.Schema`, `Namespace`, path/URL, or None to use
230230
the module-level default. Per-call schema overrides propagate to worker
231231
processes.
232232
@@ -235,7 +235,7 @@ def index_dataset(
235235
"""
236236
root = as_path(root)
237237

238-
bids_schema = BIDSSchema.prepare(schema)
238+
bids_schema = SchemaAdapter.load(schema)
239239
entity_arrow_schema = bids_schema.arrow_schema
240240
full_schema = get_arrow_schema(schema=bids_schema)
241241

@@ -278,7 +278,7 @@ def batch_index_dataset(
278278
max_workers: int | None = 0,
279279
executor_cls: type[Executor] = ProcessPoolExecutor,
280280
show_progress: bool = False,
281-
schema: BIDSSchema | pa.Schema | Namespace | str | Path | None = None,
281+
schema: SchemaAdapter | pa.Schema | Namespace | str | Path | None = None,
282282
) -> Generator[pa.Table, None, None]:
283283
"""Index a batch of BIDS datasets.
284284
@@ -290,13 +290,13 @@ def batch_index_dataset(
290290
See `concurrent.futures.ProcessPoolExecutor` for details.
291291
executor_cls: Executor class to use for parallel indexing.
292292
show_progress: Show progress bar.
293-
schema: A `BIDSSchema`, `pa.Schema`, `Namespace`, path/URL, or None to use
293+
schema: A `SchemaAdapter`, `pa.Schema`, `Namespace`, path/URL, or None to use
294294
the module-level default.
295295
296296
Yields:
297297
An Arrow table index for each BIDS dataset.
298298
"""
299-
bids_schema = BIDSSchema.prepare(schema)
299+
bids_schema = SchemaAdapter.load(schema)
300300
entity_arrow_schema = bids_schema.arrow_schema
301301
func = partial(_batch_index_func, schema=entity_arrow_schema)
302302

@@ -421,7 +421,7 @@ def _index_bids_subject_dir(
421421
422422
Args:
423423
path: BIDS subject directory.
424-
schema: BIDS entity Arrow schema (i.e. ``BIDSSchema.arrow_schema``).
424+
schema: BIDS entity Arrow schema (i.e. ``SchemaAdapter.arrow_schema``).
425425
Pass-through to validation; the full index schema is derived from
426426
it. None uses the module-level default.
427427
dataset: Dataset identifier; computed if not given.

bids2table/_schema.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -105,37 +105,38 @@ def _entity_lookups_from_arrow(
105105

106106

107107
@dataclass(frozen=True)
108-
class BIDSSchema:
108+
class SchemaAdapter:
109109
"""Encapsulates a BIDS schema and its derived Arrow representation.
110110
111-
Use `BIDSSchema.from_path`, `from_namespace`, `from_arrow`, or `prepare`
112-
rather than constructing directly.
111+
Use `SchemaAdapter.load` rather than constructing directly.
113112
"""
114113

115114
arrow_schema: pa.Schema
116-
_entity_schema: dict[str, dict[str, Any]] = field(repr=False)
117-
_name_entity_map: dict[str, str] = field(repr=False)
118-
bids_schema: Namespace | None = field(default=None, repr=False)
115+
entity_schema: dict[str, dict[str, Any]] = field(repr=False)
116+
name_entity_map: dict[str, str] = field(repr=False)
119117

120118
@classmethod
121-
def prepare(
122-
cls, schema: "BIDSSchema | pa.Schema | Namespace | str | Path | None"
123-
) -> "BIDSSchema":
119+
def load(
120+
cls,
121+
schema: SchemaAdapter | pa.Schema | Namespace | str | Path | None = None,
122+
) -> SchemaAdapter:
124123
"""Polymorphic constructor.
125124
126-
- `BIDSSchema` -> returned unchanged
127-
- `pa.Schema` -> via `from_arrow`
128-
- `Namespace` -> via `from_namespace`
129-
- `str` / `Path` / `None` -> via `from_path`
125+
Existing SchemaAdapters and pyarrow Schemas are passed through with minimal processing.
126+
127+
Paths and `None` are passed directly to `bidsschematools.schema.load_schema` for loading,
128+
and the resulting `Namespace` is queried to extract the components used by bids2table.
129+
130+
A pre-loaded `Namespace` is also accepted, for callers that may want to modify a schema.
130131
"""
131132
if isinstance(schema, cls):
132133
return schema
133134
elif isinstance(schema, pa.Schema):
134135
entity_schema, name_entity_map = _entity_lookups_from_arrow(schema)
135136
return cls(
136137
arrow_schema=schema,
137-
_entity_schema=entity_schema,
138-
_name_entity_map=name_entity_map,
138+
entity_schema=entity_schema,
139+
name_entity_map=name_entity_map,
139140
)
140141

141142
ns: Namespace = (
@@ -157,11 +158,6 @@ def prepare(
157158
)
158159
return cls(
159160
arrow_schema=arrow_schema,
160-
_entity_schema=entity_schema,
161-
_name_entity_map=name_entity_map,
162-
bids_schema=ns,
161+
entity_schema=entity_schema,
162+
name_entity_map=name_entity_map,
163163
)
164-
165-
def lookups(self) -> tuple[dict[str, dict[str, Any]], dict[str, str]]:
166-
"""Return `(entity_schema, name_entity_map)` for validation."""
167-
return self._entity_schema, self._name_entity_map

tests/test_entities.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
parse_bids_entities,
1010
validate_bids_entities,
1111
)
12-
from bids2table._schema import BIDSSchema
12+
from bids2table._schema import SchemaAdapter
1313

1414

1515
class ExampleCase(NamedTuple):
@@ -104,14 +104,14 @@ def test_format_bids_path(path: str):
104104

105105

106106
def test_validate_bids_entities_accepts_explicit_bids_schema():
107-
schema = BIDSSchema.prepare(None)
107+
schema = SchemaAdapter.load()
108108
valid, extra = validate_bids_entities({"sub": "01", "task": "rest"}, schema=schema)
109109
assert valid == {"sub": "01", "task": "rest"}
110110
assert extra == {}
111111

112112

113113
def test_validate_bids_entities_accepts_arrow_schema():
114-
schema = BIDSSchema.prepare(None)
114+
schema = SchemaAdapter.load()
115115
valid, extra = validate_bids_entities(
116116
{"sub": "01", "task": "rest"}, schema=schema.arrow_schema
117117
)
@@ -126,7 +126,7 @@ def test_validate_bids_entities_default_schema_unchanged():
126126

127127

128128
def test_validate_bids_entities_arrow_and_bids_schema_paths_agree():
129-
schema = BIDSSchema.prepare(None)
129+
schema = SchemaAdapter.load()
130130
entities = {"sub": "01", "task": "rest", "run": "1"}
131131
bids_result = validate_bids_entities(entities, schema=schema)
132132
arrow_result = validate_bids_entities(entities, schema=schema.arrow_schema)

tests/test_indexing.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import bids2table._indexing as indexing
1010
from bids2table._indexing import get_arrow_schema, get_column_names
1111
from bids2table._pathlib import cloudpathlib_is_available
12-
from bids2table._schema import BIDSSchema
12+
from bids2table._schema import SchemaAdapter
1313

1414
BIDS_EXAMPLES = Path(__file__).parents[1] / "bids-examples"
1515

@@ -254,27 +254,27 @@ def test_h_fmt(num: int, expected: str):
254254

255255

256256
def test_get_arrow_schema_with_explicit_schema():
257-
s = BIDSSchema.prepare(None)
257+
s = SchemaAdapter.load()
258258
arrow = get_arrow_schema(schema=s)
259259
assert "sub" in {f.name for f in arrow}
260260
assert "dataset" in {f.name for f in arrow}
261261

262262

263263
def test_get_column_names_with_explicit_schema():
264-
s = BIDSSchema.prepare(None)
264+
s = SchemaAdapter.load()
265265
cols = get_column_names(schema=s)
266266
assert "sub" in [c.value for c in cols]
267267

268268

269269
def test_get_arrow_schema_accepts_pa_schema():
270-
s = BIDSSchema.prepare(None)
270+
s = SchemaAdapter.load()
271271
arrow = get_arrow_schema(schema=s.arrow_schema)
272272
assert "sub" in {f.name for f in arrow}
273273
assert "dataset" in {f.name for f in arrow}
274274

275275

276276
def test_index_dataset_with_explicit_schema():
277-
s = BIDSSchema.prepare(None)
277+
s = SchemaAdapter.load()
278278
table = indexing.index_dataset(BIDS_EXAMPLES / "ds102", schema=s)
279279
assert table.num_rows > 0
280280
assert "sub" in table.schema.names
@@ -284,10 +284,10 @@ def test_index_dataset_workers_honor_explicit_schema():
284284
"""Regression: workers must use the schema passed to index_dataset, not
285285
re-import the module default.
286286
"""
287-
base = BIDSSchema.prepare(None)
287+
base = SchemaAdapter.load()
288288
base_md = {k.decode(): v.decode() for k, v in base.arrow_schema.metadata.items()}
289289
tagged_arrow = base.arrow_schema.with_metadata({**base_md, "test_marker": "tagged"})
290-
tagged = BIDSSchema.prepare(tagged_arrow)
290+
tagged = SchemaAdapter.load(tagged_arrow)
291291

292292
table = indexing.index_dataset(
293293
BIDS_EXAMPLES / "ds102", schema=tagged, max_workers=2
@@ -296,7 +296,7 @@ def test_index_dataset_workers_honor_explicit_schema():
296296

297297

298298
def test_batch_index_dataset_with_explicit_schema():
299-
s = BIDSSchema.prepare(None)
299+
s = SchemaAdapter.load()
300300
roots = [p.parent for p in BIDS_EXAMPLES.glob("*/dataset_description.json")][:2]
301301
tables = list(indexing.batch_index_dataset(roots, schema=s))
302302
assert len(tables) == len(roots)
@@ -305,15 +305,15 @@ def test_batch_index_dataset_with_explicit_schema():
305305

306306

307307
def test_two_schemas_one_process_produce_distinct_metadata():
308-
"""Two BIDSSchema instances yield index tables with distinguishable schemas.
308+
"""Two SchemaAdapter instances yield index tables with distinguishable schemas.
309309
310-
Distinguish via a custom marker injected into one BIDSSchema's arrow_schema
310+
Distinguish via a custom marker injected into one SchemaAdapter's arrow_schema
311311
metadata. The non-marked schema must not pick up the marker.
312312
"""
313-
base = BIDSSchema.prepare(None)
313+
base = SchemaAdapter.load()
314314
base_md = {k.decode(): v.decode() for k, v in base.arrow_schema.metadata.items()}
315315
tagged_arrow = base.arrow_schema.with_metadata({**base_md, "test_marker": "tagged"})
316-
tagged = BIDSSchema.prepare(tagged_arrow)
316+
tagged = SchemaAdapter.load(tagged_arrow)
317317

318318
dataset_root = BIDS_EXAMPLES / "ds102"
319319

0 commit comments

Comments
 (0)