|
3 | 3 | Uses the BIDS schema for validation. |
4 | 4 | """ |
5 | 5 |
|
6 | | -import json |
7 | 6 | import re |
8 | 7 | from functools import lru_cache |
9 | 8 | from pathlib import Path |
10 | 9 | from typing import Any |
11 | 10 |
|
12 | | -import bidsschematools.schema |
13 | 11 | import pyarrow as pa |
14 | | -from bidsschematools.types import Namespace |
15 | 12 |
|
16 | 13 | from ._logging import setup_logger |
17 | 14 | from ._schema import ( |
|
23 | 20 |
|
24 | 21 | BIDSValue = str | int |
25 | 22 |
|
26 | | -# Global BIDS schema namespace. |
27 | | -_BIDS_SCHEMA: Namespace |
28 | | -# Map of entity names to schema metadata. |
29 | | -_BIDS_ENTITY_SCHEMA: dict[str, dict[str, Any]] |
30 | | -# Map of BIDS short names (e.g. 'sub') to long entities ('subject'). |
31 | | -_BIDS_NAME_ENTITY_MAP: dict[str, str] |
32 | | - |
33 | | -# BIDS schema in Arrow format |
34 | | -_BIDS_ENTITY_ARROW_SCHEMA: pa.Schema |
35 | | - |
36 | | -# "Special" entities that are part of the BIDS file name spec but not in the BIDS schema |
37 | | -# (bc they don't follow the '{key}-{value}' format). |
38 | | -_BIDS_SPECIAL_ENTITY_SCHEMA = { |
39 | | - "datatype": { |
40 | | - "name": "datatype", |
41 | | - "display_name": "Data type", |
42 | | - "description": "A functional group of different types of data.", |
43 | | - "type": "string", |
44 | | - "format": "special", |
45 | | - }, |
46 | | - "suffix": { |
47 | | - "name": "suffix", |
48 | | - "display_name": "Suffix", |
49 | | - "description": "Final part of file name after final '_' and before extension.", |
50 | | - "type": "string", |
51 | | - "format": "special", |
52 | | - }, |
53 | | - "extension": { |
54 | | - "name": "ext", |
55 | | - "display_name": "File extension", |
56 | | - "description": "Full file extension after the left-most period.", |
57 | | - "type": "string", |
58 | | - "format": "special", |
59 | | - }, |
60 | | -} |
61 | | - |
62 | | -_BIDS_FORMAT_ARROW_DTYPE_MAP = { |
63 | | - "index": pa.int32(), |
64 | | - "label": pa.string(), |
65 | | - "special": pa.string(), |
66 | | -} |
67 | | - |
68 | 23 | _BIDS_FORMAT_PY_TYPE_MAP = { |
69 | 24 | "index": int, |
70 | 25 | "label": str, |
|
80 | 35 | _logger = setup_logger(__package__) |
81 | 36 |
|
82 | 37 |
|
83 | | -def set_bids_schema(path: str | Path | None = None) -> None: |
84 | | - """Set the BIDS schema.""" |
85 | | - global _BIDS_SCHEMA, _BIDS_ENTITY_SCHEMA, _BIDS_NAME_ENTITY_MAP |
86 | | - global _BIDS_ENTITY_ARROW_SCHEMA |
87 | | - |
88 | | - schema = bidsschematools.schema.load_schema(path) |
89 | | - entity_schema = { |
90 | | - entity: schema.objects.entities[entity].to_dict() |
91 | | - for entity in schema.rules.entities |
92 | | - } |
93 | | - # Also include special extra entities (datatype, suffix, extension). |
94 | | - entity_schema.update(_BIDS_SPECIAL_ENTITY_SCHEMA) |
95 | | - name_entity_map = {cfg["name"]: entity for entity, cfg in entity_schema.items()} |
96 | | - |
97 | | - _BIDS_SCHEMA = schema |
98 | | - _BIDS_ENTITY_SCHEMA = entity_schema |
99 | | - _BIDS_NAME_ENTITY_MAP = name_entity_map |
100 | | - |
101 | | - _BIDS_ENTITY_ARROW_SCHEMA = _bids_entity_arrow_schema( |
102 | | - entity_schema, |
103 | | - bids_version=schema["bids_version"], |
104 | | - schema_version=schema["schema_version"], |
105 | | - ) |
106 | | - |
107 | | - |
108 | | -def _bids_entity_arrow_schema( |
109 | | - entity_schema: dict[str, dict[str, Any]], |
110 | | - bids_version: str, |
111 | | - schema_version: str, |
112 | | -) -> pa.Schema: |
113 | | - """Create Arrow schema from BIDS entity schema.""" |
114 | | - fields = [] |
115 | | - for entity, cfg in entity_schema.items(): |
116 | | - # Use short entity name (e.g. sub) as the field name. |
117 | | - name = cfg["name"] |
118 | | - dtype = _BIDS_FORMAT_ARROW_DTYPE_MAP[cfg["format"]] |
119 | | - # Insert full entity name (e.g. subject) into metadata. |
120 | | - metadata = {"entity": entity} |
121 | | - metadata.update( |
122 | | - {k: v if isinstance(v, str) else json.dumps(v) for k, v in cfg.items()} |
123 | | - ) |
124 | | - |
125 | | - field = pa.field(name, dtype, metadata=metadata) |
126 | | - fields.append(field) |
127 | | - |
128 | | - metadata = {"bids_version": bids_version, "schema_version": schema_version} |
129 | | - arrow_schema = pa.schema(fields, metadata=metadata) |
130 | | - return arrow_schema |
131 | | - |
132 | | - |
133 | | -def get_bids_schema() -> Namespace: |
134 | | - """Get the current BIDS schema.""" |
135 | | - return _BIDS_SCHEMA |
136 | | - |
137 | | - |
138 | | -def get_bids_entity_arrow_schema() -> pa.Schema: |
139 | | - """Get the current BIDS entity schema in Arrow format.""" |
140 | | - return _BIDS_ENTITY_ARROW_SCHEMA |
141 | | - |
142 | | - |
143 | 38 | def parse_bids_entities(path: str | Path) -> dict[str, str]: |
144 | 39 | """Parse entities from BIDS file path. |
145 | 40 |
|
@@ -317,7 +212,3 @@ def format_bids_path(entities: dict[str, Any], int_format: str = "%d") -> Path: |
317 | 212 | path = f"ses-{ses}" / path |
318 | 213 | path = f"sub-{entities['sub']}" / path |
319 | 214 | return path |
320 | | - |
321 | | - |
322 | | -# Initialize the default BIDS schema. |
323 | | -set_bids_schema() |
0 commit comments