Skip to content

Commit 8756d75

Browse files
committed
convert to Decimal with proper precision instead
1 parent 70135c7 commit 8756d75

2 files changed

Lines changed: 48 additions & 18 deletions

File tree

src/data_designer/engine/processing/gsonschema/validators.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import logging
5+
import re
56
from copy import deepcopy
7+
from decimal import ROUND_HALF_UP, Decimal
68
from typing import Any, overload
79

810
from jsonschema import Draft202012Validator, ValidationError, validators
@@ -70,17 +72,30 @@ def extend_jsonschema_validator_with_pruning(validator):
7072
return validators.extend(validator, {"additionalProperties": prune_additional_properties})
7173

7274

73-
def _has_number_string_anyof(schema: dict) -> bool:
74-
"""Check if schema has anyOf with both number and string (Pydantic Decimal pattern)."""
75+
def _get_decimal_info_from_anyof(schema: dict) -> tuple[bool, int | None]:
76+
"""Check if schema is a Decimal anyOf and extract decimal places.
77+
78+
Returns (is_decimal, decimal_places) where decimal_places is None if no constraint.
79+
"""
7580
any_of = schema.get("anyOf")
7681
if not isinstance(any_of, list):
77-
return False
78-
types = {item.get("type") for item in any_of}
79-
return "number" in types and "string" in types
82+
return False, None
83+
84+
has_number = any(item.get("type") == "number" for item in any_of)
85+
if not has_number:
86+
return False, None
87+
88+
for item in any_of:
89+
if item.get("type") == "string" and "pattern" in item:
90+
match = re.search(r"\\d\{0,(\d+)\}", item["pattern"])
91+
if match:
92+
return True, int(match.group(1))
93+
return True, None # Decimal without precision constraint
94+
return False, None
8095

8196

8297
def normalize_decimal_fields(obj: DataObjectT, schema: JSONSchemaT) -> DataObjectT:
83-
"""Convert numeric values to strings for Decimal-like anyOf fields."""
98+
"""Normalize Decimal-like anyOf fields to floats with proper precision."""
8499
if not isinstance(obj, dict):
85100
return obj
86101

@@ -97,8 +112,13 @@ def normalize_decimal_fields(obj: DataObjectT, schema: JSONSchemaT) -> DataObjec
97112
obj[key] = normalize_decimal_fields(value, schema)
98113
elif isinstance(value, list):
99114
obj[key] = [normalize_decimal_fields(v, schema) if isinstance(v, dict) else v for v in value]
100-
elif isinstance(value, (int, float)) and not isinstance(value, bool) and _has_number_string_anyof(field_schema):
101-
obj[key] = str(value)
115+
elif isinstance(value, (int, float, str)) and not isinstance(value, bool):
116+
is_decimal, decimal_places = _get_decimal_info_from_anyof(field_schema)
117+
if is_decimal:
118+
d = Decimal(str(value))
119+
if decimal_places is not None:
120+
d = d.quantize(Decimal(f"0.{'0' * decimal_places}"), rounding=ROUND_HALF_UP)
121+
obj[key] = float(d)
102122

103123
return obj
104124

tests/engine/processing/gsonschema/test_validators.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -199,21 +199,31 @@ def test_invalid_data_type():
199199

200200

201201
def test_normalize_decimal_anyof_fields() -> None:
202-
"""Test that Decimal-like anyOf fields (number|string) are normalized to strings."""
202+
"""Test that Decimal-like anyOf fields are normalized to floats with proper precision."""
203203
schema = {
204204
"type": "object",
205205
"properties": {
206206
"name": {"type": "string"},
207-
"price": {"anyOf": [{"type": "number"}, {"type": "string"}]},
207+
"price": {
208+
"anyOf": [
209+
{"type": "number"},
210+
{"type": "string", "pattern": r"^(?!^[-+.]*$)[+-]?0*\d*\.?\d{0,2}0*$"},
211+
]
212+
},
208213
},
209214
}
210215

211-
# Numeric value should be converted to string
212-
result1 = validate({"name": "Widget", "price": 189.99}, schema)
213-
assert result1["price"] == "189.99"
214-
assert isinstance(result1["price"], str)
216+
# Numeric value with extra precision should be rounded to 2 decimal places
217+
result1 = validate({"name": "Widget", "price": 189.999}, schema)
218+
assert result1["price"] == 190.0
219+
assert isinstance(result1["price"], float)
220+
221+
# Numeric value should be converted to float
222+
result2 = validate({"name": "Gadget", "price": 50.5}, schema)
223+
assert result2["price"] == 50.5
224+
assert isinstance(result2["price"], float)
215225

216-
# String value should remain a string
217-
result2 = validate({"name": "Gadget", "price": "249.99"}, schema)
218-
assert result2["price"] == "249.99"
219-
assert isinstance(result2["price"], str)
226+
# String value should be converted to float
227+
result3 = validate({"name": "Gizmo", "price": "249.99"}, schema)
228+
assert result3["price"] == 249.99
229+
assert isinstance(result3["price"], float)

0 commit comments

Comments
 (0)