|
4 | 4 | This script converts LinkML YAML files to JSON Schema format that is compatible |
5 | 5 | with Synapse's JSON Schema service. It handles flattening, version conversion, |
6 | 6 | and Synapse-specific formatting requirements. |
| 7 | +
|
| 8 | +Key transformations: |
| 9 | +- Flattens JSON schemas (removes $ref and $defs) |
| 10 | +- Converts schema version to Draft-07 (Synapse requirement) |
| 11 | +- Fixes additionalProperties (converts boolean to {}) |
| 12 | +- Cleans union types (removes null from type arrays) |
| 13 | +- Fixes boolean pattern checks (converts pattern: "^true$" to const: true for boolean fields) |
| 14 | +- Removes unsupported fields ($defs, metamodel_version, version) |
| 15 | +
|
| 16 | +Note on boolean pattern fixes: |
| 17 | +LinkML rules with pattern: "^true$" or "^false$" for boolean fields are converted |
| 18 | +to const: true or const: false because JSON Schema's pattern keyword only applies |
| 19 | +to strings, not booleans. This ensures conditional requirements work correctly |
| 20 | +for boolean fields in the generated JSON Schema. |
7 | 21 | """ |
8 | 22 | from pathlib import Path |
9 | 23 | from typing import Any, Union |
@@ -243,6 +257,78 @@ def recursive_clean_union(obj, visited=None): |
243 | 257 | return schema_data |
244 | 258 |
|
245 | 259 |
|
| 260 | +def fix_boolean_patterns(schema_data: dict) -> dict: |
| 261 | + """Convert pattern checks for boolean fields to const checks. |
| 262 | + |
| 263 | + LinkML rules with pattern: "^true$" or "^false$" for boolean fields |
| 264 | + need to be converted to const: true or const: false in JSON Schema, |
| 265 | + since pattern only applies to strings, not booleans. |
| 266 | + |
| 267 | + This is necessary because: |
| 268 | + - JSON Schema's `pattern` keyword only validates strings |
| 269 | + - Boolean fields use `const` for exact value matching |
| 270 | + - LinkML generates `pattern: "^true$"` for boolean checks in rules |
| 271 | + - The conversion ensures conditional requirements work correctly |
| 272 | + |
| 273 | + Example transformation: |
| 274 | + - Before: {"HAS_SLIDE_LABEL": {"pattern": "^true$"}} |
| 275 | + - After: {"HAS_SLIDE_LABEL": {"const": true}} |
| 276 | + |
| 277 | + Only affects properties with type: "boolean" in the schema. |
| 278 | + String fields with patterns are left unchanged. |
| 279 | + """ |
| 280 | + # Get property types from the schema |
| 281 | + properties = schema_data.get("properties", {}) |
| 282 | + |
| 283 | + def fix_boolean_patterns_in_obj(obj, visited=None): |
| 284 | + if visited is None: |
| 285 | + visited = set() |
| 286 | + |
| 287 | + obj_id = id(obj) |
| 288 | + if obj_id in visited: |
| 289 | + return |
| 290 | + visited.add(obj_id) |
| 291 | + |
| 292 | + try: |
| 293 | + if isinstance(obj, dict): |
| 294 | + # Check if this is an "if" clause with properties |
| 295 | + if "if" in obj and isinstance(obj["if"], dict): |
| 296 | + if_clause = obj["if"] |
| 297 | + if "properties" in if_clause: |
| 298 | + for prop_name, prop_schema in if_clause["properties"].items(): |
| 299 | + # Check if this property is a boolean type |
| 300 | + prop_def = properties.get(prop_name, {}) |
| 301 | + if prop_def.get("type") == "boolean": |
| 302 | + # Convert pattern to const for boolean fields |
| 303 | + if "pattern" in prop_schema: |
| 304 | + pattern = prop_schema["pattern"] |
| 305 | + if pattern == "^true$": |
| 306 | + prop_schema["const"] = True |
| 307 | + del prop_schema["pattern"] |
| 308 | + elif pattern == "^false$": |
| 309 | + prop_schema["const"] = False |
| 310 | + del prop_schema["pattern"] |
| 311 | + |
| 312 | + # Recursively process allOf arrays (where rules are typically stored) |
| 313 | + if "allOf" in obj and isinstance(obj["allOf"], list): |
| 314 | + for item in obj["allOf"]: |
| 315 | + fix_boolean_patterns_in_obj(item, visited) |
| 316 | + |
| 317 | + # Recursively process nested objects |
| 318 | + for value in obj.values(): |
| 319 | + fix_boolean_patterns_in_obj(value, visited) |
| 320 | + elif isinstance(obj, list): |
| 321 | + for item in obj: |
| 322 | + fix_boolean_patterns_in_obj(item, visited) |
| 323 | + except (RecursionError, TypeError, AttributeError) as e: |
| 324 | + print(f"Warning: Skipping object due to error: {e}") |
| 325 | + pass |
| 326 | + |
| 327 | + fix_boolean_patterns_in_obj(schema_data) |
| 328 | + print("Fixed boolean pattern checks to use const") |
| 329 | + return schema_data |
| 330 | + |
| 331 | + |
246 | 332 | def get_args(): |
247 | 333 | """Set up command-line interface and get arguments.""" |
248 | 334 | parser = argparse.ArgumentParser( |
@@ -309,6 +395,7 @@ def main(): |
309 | 395 | schema_data = fix_schema_version(schema_data) |
310 | 396 | schema_data = fix_additional_properties(schema_data) |
311 | 397 | schema_data = clean_union_types(schema_data) |
| 398 | + schema_data = fix_boolean_patterns(schema_data) |
312 | 399 | schema_data = remove_unsupported_fields(schema_data) |
313 | 400 |
|
314 | 401 | # 4. Write final result to output file |
|
0 commit comments