Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a123fdc
add datatypes to data model
andrewelamb Jun 17, 2025
a70a85f
add date and url as validaiton ruels for JSON Schemas
andrewelamb Jun 17, 2025
7fa9cd2
ran pre-commit
andrewelamb Jun 17, 2025
979b8d7
reordered constant file
andrewelamb Jun 17, 2025
f8f95f4
fix bug with misnamed variable
andrewelamb Jun 17, 2025
9c488eb
cleaned up variable names
andrewelamb Jun 18, 2025
9a81a16
some cleanup
andrewelamb Jun 18, 2025
207504f
clear up docstring
andrewelamb Jun 18, 2025
42a9cc7
remove bool validaiton rule
andrewelamb Jun 18, 2025
e28c660
clear up docstring
andrewelamb Jun 18, 2025
45f5042
clear up docstring
andrewelamb Jun 18, 2025
ab6a80e
change _VALIDATION_RULES to dict
andrewelamb Jun 18, 2025
e195000
redo how type is determined
andrewelamb Jun 18, 2025
400ec39
removed is_type_rule attribute
andrewelamb Jun 18, 2025
48eade8
fixed docstring
andrewelamb Jun 18, 2025
d82003e
refactor
andrewelamb Jun 19, 2025
9b4759b
updated columntype datamodels
andrewelamb Jun 20, 2025
138627c
rename module
andrewelamb Jun 20, 2025
e8b174b
added links to JSON Schema docs
andrewelamb Jun 20, 2025
ee50f0c
ran pre-commit
andrewelamb Jun 20, 2025
db89419
fix missing incompatible rules
andrewelamb Jun 20, 2025
abe58f0
refactored how validation rule inputs are checked
andrewelamb Jun 20, 2025
bc57779
refactored how validation rule inputs are checked
andrewelamb Jun 20, 2025
a2da38d
ran pre-commit
andrewelamb Jun 20, 2025
718acba
Merge branch 'develop' into SCHEMATIC-295
andrewelamb Jun 26, 2025
33b3406
variosu suggestions
andrewelamb Jun 26, 2025
c9fd510
linglings suggestions
andrewelamb Jul 1, 2025
d85817b
fix tests
andrewelamb Jul 1, 2025
dce5ce3
giannas suggestions
andrewelamb Jul 1, 2025
f031961
run pre commit
andrewelamb Jul 1, 2025
4aa3c87
linglings suggestions
andrewelamb Jul 2, 2025
aabae47
Update schematic/schemas/json_schema_validation_rule_functions.py
andrewelamb Jul 2, 2025
226423d
Update schematic/schemas/json_schema_validation_rule_functions.py
andrewelamb Jul 2, 2025
f26bf8b
Update schematic/schemas/json_schema_validation_rule_functions.py
andrewelamb Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions schematic/schemas/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from enum import Enum


class ValidationRule(Enum):
"""Validation rules that are used to create JSON Schema"""
class ValidationRuleName(Enum):
"""Names of validation rules that are used to create JSON Schema"""

LIST = "list"
DATE = "date"
URL = "url"
REGEX = "regex"
IN_RANGE = "inRange"
STR = "str"
Expand All @@ -16,26 +19,23 @@ class ValidationRule(Enum):


class JSONSchemaType(Enum):
"""This enum is allowed values type values for a JSON Schema in a data model"""
"""This enum is the currently supported JSON Schema types"""

STRING = "string"
NUMBER = "number"
INTEGER = "integer"
BOOLEAN = "boolean"


class JSONSchemaFormat(Enum):
"""This enum is the currently supported JSON Schema formats"""

DATE = "date"
URI = "uri"


class RegexModule(Enum):
"""This enum are allowed modules for the regex validation rule"""

SEARCH = "search"
MATCH = "match"


# A dict where the keys are type validation rules, and the values are their JSON Schema equivalent
TYPE_RULES = {
ValidationRule.STR.value: JSONSchemaType.STRING.value,
ValidationRule.NUM.value: JSONSchemaType.NUMBER.value,
ValidationRule.FLOAT.value: JSONSchemaType.NUMBER.value,
ValidationRule.INT.value: JSONSchemaType.INTEGER.value,
ValidationRule.BOOL.value: JSONSchemaType.BOOLEAN.value,
}
263 changes: 98 additions & 165 deletions schematic/schemas/create_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@

from schematic.schemas.data_model_graph import DataModelGraphExplorer
from schematic.utils.schema_utils import get_json_schema_log_file_path
from schematic.utils.validate_utils import rule_in_rule_list
from schematic.utils.io_utils import export_json
from schematic.schemas.constants import (
ValidationRule,
JSONSchemaType,
RegexModule,
TYPE_RULES,
from schematic.schemas.json_schema_validation_rule_functions import (
ValidationRuleName,
filter_unused_inputted_rules,
check_for_conflicting_inputted_rules,
check_for_duplicate_inputted_rules,
get_in_range_parameters_from_inputted_rule,
get_regex_parameters_from_inputted_rule,
get_js_type_from_inputted_rules,
get_rule_from_inputted_rules,
get_validation_rule_names_from_inputted_rules,
get_names_from_inputted_rules,
)
from schematic.schemas.constants import JSONSchemaType, JSONSchemaFormat


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -142,11 +148,12 @@ class Node: # pylint: disable=too-many-instance-attributes
is_required: Whether or not this node is required
dependencies: This nodes dependencies
description: This nodes description, gotten from the comment in the data model
type: The type of the property (inferred from validation_rules)
is_array: Whether or not the property is an array (inferred from validation_rules)
type: The type of the property (inferred from validation_rules)
format: The format of the property (inferred from validation_rules)
minimum: The minimum value of the property (if numeric) (inferred from validation_rules)
maximum: The maximum value of the property (if numeric) (inferred from validation_rules)
pattern: The regex pattern of the property
pattern: The regex pattern of the property (inferred from validation_rules)
"""

name: str
Expand All @@ -158,8 +165,9 @@ class Node: # pylint: disable=too-many-instance-attributes
is_required: bool = field(init=False)
dependencies: list[str] = field(init=False)
description: str = field(init=False)
type: Optional[str] = field(init=False)
is_array: bool = field(init=False)
type: Optional[JSONSchemaType] = field(init=False)
format: Optional[JSONSchemaFormat] = field(init=False)
minimum: Optional[float] = field(init=False)
maximum: Optional[float] = field(init=False)
pattern: Optional[str] = field(init=False)
Expand Down Expand Up @@ -191,8 +199,9 @@ def __post_init__(self) -> None:
)

(
self.type,
self.is_array,
self.type,
self.format,
self.minimum,
self.maximum,
self.pattern,
Expand All @@ -201,168 +210,91 @@ def __post_init__(self) -> None:

def _get_validation_rule_based_fields(
validation_rules: list[str],
) -> tuple[Optional[str], bool, Optional[float], Optional[float], Optional[str]]:
) -> tuple[
bool,
Optional[JSONSchemaType],
Optional[JSONSchemaFormat],
Optional[float],
Optional[float],
Optional[str],
]:
"""
Gets the fields for the Node class that are based on the validation rules

Args:
validation_rules: A list of validation rules

Raises:
ValueError: If both the inRange and regex rule are present
ValueError: If the inRange rule and a type validation rule other than 'int' or 'num'
are present
ValueError: If the regex rule and a type validation rule other than 'str' are present

Returns:
A tuple containing the type, is_array, minimum, maximum, and pattern fields for
a Node object
"""
prop_type: Optional[str] = None
is_array = False
minimum: Optional[float] = None
maximum: Optional[float] = None
pattern: Optional[str] = None

if validation_rules:
if rule_in_rule_list("list", validation_rules):
is_array = True

type_rule = _get_type_rule_from_rule_list(validation_rules)
if type_rule:
prop_type = TYPE_RULES.get(type_rule)

regex_rule = _get_rule_from_rule_list(ValidationRule.REGEX, validation_rules)
range_rule = _get_rule_from_rule_list(ValidationRule.IN_RANGE, validation_rules)
if range_rule and regex_rule:
raise ValueError(
"regex and inRange rules are incompatible: ", validation_rules
)

if range_rule:
if prop_type not in [
JSONSchemaType.NUMBER.value,
JSONSchemaType.INTEGER.value,
None,
]:
raise ValueError(
"Validation rules must be either 'int' or 'num' when using the inRange rule"
)
prop_type = prop_type or JSONSchemaType.NUMBER.value
minimum, maximum = _get_range_from_in_range_rule(range_rule)

if regex_rule:
if prop_type not in (None, JSONSchemaType.STRING.value):
raise ValueError("Type must be 'string' when using a regex rule")
prop_type = JSONSchemaType.STRING.value
pattern = _get_pattern_from_regex_rule(regex_rule)

return (
prop_type,
is_array,
minimum,
maximum,
pattern,
)


def _get_range_from_in_range_rule(
rule: str,
) -> tuple[Optional[float], Optional[float]]:
"""
Returns the min and max from an inRange rule if they exist

Arguments:
rule: The inRange rule

Returns:
The min and max from the rule
"""
range_min: Optional[float] = None
range_max: Optional[float] = None
parameters = rule.split(" ")
if len(parameters) > 1 and parameters[1].isnumeric():
range_min = float(parameters[1])
if len(parameters) > 2 and parameters[2].isnumeric():
range_max = float(parameters[2])
return (range_min, range_max)
JSON Schema docs:


def _get_pattern_from_regex_rule(rule: str) -> Optional[str]:
"""Gets the pattern from the regex rule
Array: https://json-schema.org/understanding-json-schema/reference/array
Types: https://json-schema.org/understanding-json-schema/reference/type#type-specific-keywords
Format: https://json-schema.org/understanding-json-schema/reference/type#format
Pattern: https://json-schema.org/understanding-json-schema/reference/string#regexp
Min/max: https://json-schema.org/understanding-json-schema/reference/numeric#range

Arguments:
rule: The full regex rule
validation_rules: A list of input validation rules

Returns:
If the module parameter is search or match, and the pattern parameter exists
the pattern is returned
Otherwise None
"""
parameters = rule.split(" ")
if len(parameters) != 3:
return None
_, module, pattern = parameters
# Do not translate other modules
if module not in [item.value for item in RegexModule]:
return None
# Match is just search but only at the beginning of the string
if module == RegexModule.MATCH.value and not pattern.startswith("^"):
return f"^{pattern}"
return pattern


def _get_type_rule_from_rule_list(rule_list: list[str]) -> Optional[str]:
"""
Returns the type rule from a list of rules if there is only one
Returns None if there are no type rules

Arguments:
rule_list: A list of validation rules
A tuple containing fields for a Node object:
- js_is_array: Whether or not the Node should be an array in JSON Schema
- js_type: The JSON Schema type
- js_format: The JSON Schema format
- js_minimum: If the type is numeric the JSON Schema minimum
- js_maximum: If the type is numeric the JSON Schema maximum
- js_pattern: If the type is string the JSON Schema pattern
"""
js_is_array = False
js_type = None
js_format = None
js_minimum = None
js_maximum = None
js_pattern = None

Raises:
ValueError: When more than one type rule is found

Returns:
The type rule if one is found, or None
"""
rule_list = [rule.split(" ")[0] for rule in rule_list]
rule_list = [rule for rule in rule_list if rule in TYPE_RULES]
if len(rule_list) > 1:
raise ValueError(
"Found more than one type rule in validation rules: ", rule_list
if validation_rules:
validation_rules = filter_unused_inputted_rules(validation_rules)
validation_rule_name_strings = get_names_from_inputted_rules(validation_rules)
check_for_duplicate_inputted_rules(validation_rule_name_strings)
check_for_conflicting_inputted_rules(validation_rule_name_strings)
validation_rule_names = get_validation_rule_names_from_inputted_rules(
validation_rules
)
if len(rule_list) == 0:
return None
return rule_list[0]

js_is_array = ValidationRuleName.LIST in validation_rule_names

def _get_rule_from_rule_list(
rule: ValidationRule, rule_list: list[str]
) -> Optional[str]:
"""
Returns the a rule from a list of rules if there is only one
js_type = get_js_type_from_inputted_rules(validation_rules)

Arguments:
rule: A ValidationRule enum
rule_list: A list of validation rules
if ValidationRuleName.URL in validation_rule_names:
js_format = JSONSchemaFormat.URI
elif ValidationRuleName.DATE in validation_rule_names:
js_format = JSONSchemaFormat.DATE
else:
js_format = None

Raises:
ValueError: When more than one of the rule is found
in_range_rule = get_rule_from_inputted_rules(
ValidationRuleName.IN_RANGE, validation_rules
)
if in_range_rule:
js_minimum, js_maximum = get_in_range_parameters_from_inputted_rule(
in_range_rule
)
else:
js_minimum = None
js_maximum = None

Returns:
The rule if one is found, otherwise None is returned
"""
rule_value = rule.value
rule_list = [rule for rule in rule_list if rule.startswith(rule_value)]
if len(rule_list) > 1:
msg = (
f"Found more than one '{rule_value}' rule in validation rules: {rule_list}"
regex_rule = get_rule_from_inputted_rules(
ValidationRuleName.REGEX, validation_rules
)
raise ValueError(msg)
if len(rule_list) == 0:
return None
return rule_list[0]
if regex_rule:
js_pattern = get_regex_parameters_from_inputted_rule(regex_rule)
else:
js_pattern = None

return (
js_is_array,
js_type,
js_format,
js_minimum,
js_maximum,
js_pattern,
)


@dataclass
Expand Down Expand Up @@ -911,7 +843,7 @@ def _create_array_property(node: Node) -> Property:

items: Items = {}
if node.type:
items["type"] = node.type
items["type"] = node.type.value
_set_type_specific_keywords(items, node)

array_type_dict: TypeDict = {"type": "array", "title": "array"}
Expand Down Expand Up @@ -984,10 +916,10 @@ def _create_simple_property(node: Node) -> Property:

if node.type:
if node.is_required:
prop["type"] = node.type
prop["type"] = node.type.value
else:
prop["oneOf"] = [
{"type": node.type, "title": node.type},
{"type": node.type.value, "title": node.type.value},
{"type": "null", "title": "null"},
]
elif node.is_required:
Expand All @@ -1005,9 +937,10 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: Node) -> None:
schema: The schema to set keywords on
node (Node): The node the corresponds to the property which is being set in the JSON Schema
"""
if node.minimum is not None:
schema["minimum"] = node.minimum
if node.maximum is not None:
schema["maximum"] = node.maximum
if node.pattern is not None:
schema["pattern"] = node.pattern
for attr in ["minimum", "maximum", "pattern"]:
value = getattr(node, attr)
if value is not None:
schema[attr] = value

if node.format is not None:
schema["format"] = node.format.value
Loading
Loading