Skip to content

Commit 19ba49f

Browse files
andrewelambCopilot
andauthored
[SCHEMATIC-295] Add ability for JSON Schemas to have format:date and format:uri based on date and url validation rules (#1620)
* add datatypes to data model * add date and url as validaiton ruels for JSON Schemas * ran pre-commit * reordered constant file * fix bug with misnamed variable * cleaned up variable names * some cleanup * clear up docstring * remove bool validaiton rule * clear up docstring * clear up docstring * change _VALIDATION_RULES to dict * redo how type is determined * removed is_type_rule attribute * fixed docstring * refactor * updated columntype datamodels * rename module * added links to JSON Schema docs * ran pre-commit * fix missing incompatible rules * refactored how validation rule inputs are checked * refactored how validation rule inputs are checked * ran pre-commit * variosu suggestions * linglings suggestions * fix tests * giannas suggestions * run pre commit * linglings suggestions * Update schematic/schemas/json_schema_validation_rule_functions.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update schematic/schemas/json_schema_validation_rule_functions.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update schematic/schemas/json_schema_validation_rule_functions.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 5caa36d commit 19ba49f

13 files changed

+1453
-523
lines changed

schematic/schemas/constants.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
from enum import Enum
44

55

6-
class ValidationRule(Enum):
7-
"""Validation rules that are used to create JSON Schema"""
6+
class ValidationRuleName(Enum):
7+
"""Names of validation rules that are used to create JSON Schema"""
88

9+
LIST = "list"
10+
DATE = "date"
11+
URL = "url"
912
REGEX = "regex"
1013
IN_RANGE = "inRange"
1114
STR = "str"
@@ -16,26 +19,23 @@ class ValidationRule(Enum):
1619

1720

1821
class JSONSchemaType(Enum):
19-
"""This enum is allowed values type values for a JSON Schema in a data model"""
22+
"""This enum is the currently supported JSON Schema types"""
2023

2124
STRING = "string"
2225
NUMBER = "number"
2326
INTEGER = "integer"
2427
BOOLEAN = "boolean"
2528

2629

30+
class JSONSchemaFormat(Enum):
31+
"""This enum is the currently supported JSON Schema formats"""
32+
33+
DATE = "date"
34+
URI = "uri"
35+
36+
2737
class RegexModule(Enum):
2838
"""This enum are allowed modules for the regex validation rule"""
2939

3040
SEARCH = "search"
3141
MATCH = "match"
32-
33-
34-
# A dict where the keys are type validation rules, and the values are their JSON Schema equivalent
35-
TYPE_RULES = {
36-
ValidationRule.STR.value: JSONSchemaType.STRING.value,
37-
ValidationRule.NUM.value: JSONSchemaType.NUMBER.value,
38-
ValidationRule.FLOAT.value: JSONSchemaType.NUMBER.value,
39-
ValidationRule.INT.value: JSONSchemaType.INTEGER.value,
40-
ValidationRule.BOOL.value: JSONSchemaType.BOOLEAN.value,
41-
}

schematic/schemas/create_json_schema.py

Lines changed: 92 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,22 @@
1515

1616
from schematic.schemas.data_model_graph import DataModelGraphExplorer
1717
from schematic.utils.schema_utils import get_json_schema_log_file_path
18-
from schematic.utils.validate_utils import rule_in_rule_list
1918
from schematic.utils.io_utils import export_json
19+
from schematic.schemas.json_schema_validation_rule_functions import (
20+
filter_unused_inputted_rules,
21+
check_for_conflicting_inputted_rules,
22+
check_for_duplicate_inputted_rules,
23+
get_in_range_parameters_from_inputted_rule,
24+
get_regex_parameters_from_inputted_rule,
25+
get_js_type_from_inputted_rules,
26+
get_rule_from_inputted_rules,
27+
get_validation_rule_names_from_inputted_rules,
28+
get_names_from_inputted_rules,
29+
)
2030
from schematic.schemas.constants import (
21-
ValidationRule,
2231
JSONSchemaType,
23-
RegexModule,
24-
TYPE_RULES,
32+
JSONSchemaFormat,
33+
ValidationRuleName,
2534
)
2635

2736

@@ -142,11 +151,12 @@ class Node: # pylint: disable=too-many-instance-attributes
142151
is_required: Whether or not this node is required
143152
dependencies: This nodes dependencies
144153
description: This nodes description, gotten from the comment in the data model
145-
type: The type of the property (inferred from validation_rules)
146154
is_array: Whether or not the property is an array (inferred from validation_rules)
155+
type: The type of the property (inferred from validation_rules)
156+
format: The format of the property (inferred from validation_rules)
147157
minimum: The minimum value of the property (if numeric) (inferred from validation_rules)
148158
maximum: The maximum value of the property (if numeric) (inferred from validation_rules)
149-
pattern: The regex pattern of the property
159+
pattern: The regex pattern of the property (inferred from validation_rules)
150160
"""
151161

152162
name: str
@@ -158,8 +168,9 @@ class Node: # pylint: disable=too-many-instance-attributes
158168
is_required: bool = field(init=False)
159169
dependencies: list[str] = field(init=False)
160170
description: str = field(init=False)
161-
type: Optional[str] = field(init=False)
162171
is_array: bool = field(init=False)
172+
type: Optional[JSONSchemaType] = field(init=False)
173+
format: Optional[JSONSchemaFormat] = field(init=False)
163174
minimum: Optional[float] = field(init=False)
164175
maximum: Optional[float] = field(init=False)
165176
pattern: Optional[str] = field(init=False)
@@ -191,8 +202,9 @@ def __post_init__(self) -> None:
191202
)
192203

193204
(
194-
self.type,
195205
self.is_array,
206+
self.type,
207+
self.format,
196208
self.minimum,
197209
self.maximum,
198210
self.pattern,
@@ -201,168 +213,84 @@ def __post_init__(self) -> None:
201213

202214
def _get_validation_rule_based_fields(
203215
validation_rules: list[str],
204-
) -> tuple[Optional[str], bool, Optional[float], Optional[float], Optional[str]]:
216+
) -> tuple[
217+
bool,
218+
Optional[JSONSchemaType],
219+
Optional[JSONSchemaFormat],
220+
Optional[float],
221+
Optional[float],
222+
Optional[str],
223+
]:
205224
"""
206225
Gets the fields for the Node class that are based on the validation rules
207226
208-
Args:
209-
validation_rules: A list of validation rules
210-
211-
Raises:
212-
ValueError: If both the inRange and regex rule are present
213-
ValueError: If the inRange rule and a type validation rule other than 'int' or 'num'
214-
are present
215-
ValueError: If the regex rule and a type validation rule other than 'str' are present
216-
217-
Returns:
218-
A tuple containing the type, is_array, minimum, maximum, and pattern fields for
219-
a Node object
220-
"""
221-
prop_type: Optional[str] = None
222-
is_array = False
223-
minimum: Optional[float] = None
224-
maximum: Optional[float] = None
225-
pattern: Optional[str] = None
226-
227-
if validation_rules:
228-
if rule_in_rule_list("list", validation_rules):
229-
is_array = True
230-
231-
type_rule = _get_type_rule_from_rule_list(validation_rules)
232-
if type_rule:
233-
prop_type = TYPE_RULES.get(type_rule)
234-
235-
regex_rule = _get_rule_from_rule_list(ValidationRule.REGEX, validation_rules)
236-
range_rule = _get_rule_from_rule_list(ValidationRule.IN_RANGE, validation_rules)
237-
if range_rule and regex_rule:
238-
raise ValueError(
239-
"regex and inRange rules are incompatible: ", validation_rules
240-
)
241-
242-
if range_rule:
243-
if prop_type not in [
244-
JSONSchemaType.NUMBER.value,
245-
JSONSchemaType.INTEGER.value,
246-
None,
247-
]:
248-
raise ValueError(
249-
"Validation rules must be either 'int' or 'num' when using the inRange rule"
250-
)
251-
prop_type = prop_type or JSONSchemaType.NUMBER.value
252-
minimum, maximum = _get_range_from_in_range_rule(range_rule)
253-
254-
if regex_rule:
255-
if prop_type not in (None, JSONSchemaType.STRING.value):
256-
raise ValueError("Type must be 'string' when using a regex rule")
257-
prop_type = JSONSchemaType.STRING.value
258-
pattern = _get_pattern_from_regex_rule(regex_rule)
259-
260-
return (
261-
prop_type,
262-
is_array,
263-
minimum,
264-
maximum,
265-
pattern,
266-
)
267-
268-
269-
def _get_range_from_in_range_rule(
270-
rule: str,
271-
) -> tuple[Optional[float], Optional[float]]:
272-
"""
273-
Returns the min and max from an inRange rule if they exist
274-
275-
Arguments:
276-
rule: The inRange rule
277-
278-
Returns:
279-
The min and max from the rule
280-
"""
281-
range_min: Optional[float] = None
282-
range_max: Optional[float] = None
283-
parameters = rule.split(" ")
284-
if len(parameters) > 1 and parameters[1].isnumeric():
285-
range_min = float(parameters[1])
286-
if len(parameters) > 2 and parameters[2].isnumeric():
287-
range_max = float(parameters[2])
288-
return (range_min, range_max)
227+
JSON Schema docs:
289228
290-
291-
def _get_pattern_from_regex_rule(rule: str) -> Optional[str]:
292-
"""Gets the pattern from the regex rule
229+
Array: https://json-schema.org/understanding-json-schema/reference/array
230+
Types: https://json-schema.org/understanding-json-schema/reference/type#type-specific-keywords
231+
Format: https://json-schema.org/understanding-json-schema/reference/type#format
232+
Pattern: https://json-schema.org/understanding-json-schema/reference/string#regexp
233+
Min/max: https://json-schema.org/understanding-json-schema/reference/numeric#range
293234
294235
Arguments:
295-
rule: The full regex rule
236+
validation_rules: A list of input validation rules
296237
297238
Returns:
298-
If the module parameter is search or match, and the pattern parameter exists
299-
the pattern is returned
300-
Otherwise None
301-
"""
302-
parameters = rule.split(" ")
303-
if len(parameters) != 3:
304-
return None
305-
_, module, pattern = parameters
306-
# Do not translate other modules
307-
if module not in [item.value for item in RegexModule]:
308-
return None
309-
# Match is just search but only at the beginning of the string
310-
if module == RegexModule.MATCH.value and not pattern.startswith("^"):
311-
return f"^{pattern}"
312-
return pattern
313-
314-
315-
def _get_type_rule_from_rule_list(rule_list: list[str]) -> Optional[str]:
316-
"""
317-
Returns the type rule from a list of rules if there is only one
318-
Returns None if there are no type rules
239+
A tuple containing fields for a Node object:
240+
- js_is_array: Whether or not the Node should be an array in JSON Schema
241+
- js_type: The JSON Schema type
242+
- js_format: The JSON Schema format
243+
- js_minimum: If the type is numeric the JSON Schema minimum
244+
- js_maximum: If the type is numeric the JSON Schema maximum
245+
- js_pattern: If the type is string the JSON Schema pattern
246+
"""
247+
js_is_array = False
248+
js_type = None
249+
js_format = None
250+
js_minimum = None
251+
js_maximum = None
252+
js_pattern = None
319253

320-
Arguments:
321-
rule_list: A list of validation rules
322-
323-
Raises:
324-
ValueError: When more than one type rule is found
325-
326-
Returns:
327-
The type rule if one is found, or None
328-
"""
329-
rule_list = [rule.split(" ")[0] for rule in rule_list]
330-
rule_list = [rule for rule in rule_list if rule in TYPE_RULES]
331-
if len(rule_list) > 1:
332-
raise ValueError(
333-
"Found more than one type rule in validation rules: ", rule_list
254+
if validation_rules:
255+
validation_rules = filter_unused_inputted_rules(validation_rules)
256+
validation_rule_name_strings = get_names_from_inputted_rules(validation_rules)
257+
check_for_duplicate_inputted_rules(validation_rule_name_strings)
258+
check_for_conflicting_inputted_rules(validation_rule_name_strings)
259+
validation_rule_names = get_validation_rule_names_from_inputted_rules(
260+
validation_rules
334261
)
335-
if len(rule_list) == 0:
336-
return None
337-
return rule_list[0]
338262

263+
js_is_array = ValidationRuleName.LIST in validation_rule_names
339264

340-
def _get_rule_from_rule_list(
341-
rule: ValidationRule, rule_list: list[str]
342-
) -> Optional[str]:
343-
"""
344-
Returns the a rule from a list of rules if there is only one
265+
js_type = get_js_type_from_inputted_rules(validation_rules)
345266

346-
Arguments:
347-
rule: A ValidationRule enum
348-
rule_list: A list of validation rules
267+
if ValidationRuleName.URL in validation_rule_names:
268+
js_format = JSONSchemaFormat.URI
269+
elif ValidationRuleName.DATE in validation_rule_names:
270+
js_format = JSONSchemaFormat.DATE
349271

350-
Raises:
351-
ValueError: When more than one of the rule is found
272+
in_range_rule = get_rule_from_inputted_rules(
273+
ValidationRuleName.IN_RANGE, validation_rules
274+
)
275+
if in_range_rule:
276+
js_minimum, js_maximum = get_in_range_parameters_from_inputted_rule(
277+
in_range_rule
278+
)
352279

353-
Returns:
354-
The rule if one is found, otherwise None is returned
355-
"""
356-
rule_value = rule.value
357-
rule_list = [rule for rule in rule_list if rule.startswith(rule_value)]
358-
if len(rule_list) > 1:
359-
msg = (
360-
f"Found more than one '{rule_value}' rule in validation rules: {rule_list}"
280+
regex_rule = get_rule_from_inputted_rules(
281+
ValidationRuleName.REGEX, validation_rules
361282
)
362-
raise ValueError(msg)
363-
if len(rule_list) == 0:
364-
return None
365-
return rule_list[0]
283+
if regex_rule:
284+
js_pattern = get_regex_parameters_from_inputted_rule(regex_rule)
285+
286+
return (
287+
js_is_array,
288+
js_type,
289+
js_format,
290+
js_minimum,
291+
js_maximum,
292+
js_pattern,
293+
)
366294

367295

368296
@dataclass
@@ -911,7 +839,7 @@ def _create_array_property(node: Node) -> Property:
911839

912840
items: Items = {}
913841
if node.type:
914-
items["type"] = node.type
842+
items["type"] = node.type.value
915843
_set_type_specific_keywords(items, node)
916844

917845
array_type_dict: TypeDict = {"type": "array", "title": "array"}
@@ -984,10 +912,10 @@ def _create_simple_property(node: Node) -> Property:
984912

985913
if node.type:
986914
if node.is_required:
987-
prop["type"] = node.type
915+
prop["type"] = node.type.value
988916
else:
989917
prop["oneOf"] = [
990-
{"type": node.type, "title": node.type},
918+
{"type": node.type.value, "title": node.type.value},
991919
{"type": "null", "title": "null"},
992920
]
993921
elif node.is_required:
@@ -1005,9 +933,10 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: Node) -> None:
1005933
schema: The schema to set keywords on
1006934
node (Node): The node the corresponds to the property which is being set in the JSON Schema
1007935
"""
1008-
if node.minimum is not None:
1009-
schema["minimum"] = node.minimum
1010-
if node.maximum is not None:
1011-
schema["maximum"] = node.maximum
1012-
if node.pattern is not None:
1013-
schema["pattern"] = node.pattern
936+
for attr in ["minimum", "maximum", "pattern"]:
937+
value = getattr(node, attr)
938+
if value is not None:
939+
schema[attr] = value
940+
941+
if node.format is not None:
942+
schema["format"] = node.format.value

0 commit comments

Comments
 (0)