Skip to content

Commit 5cead91

Browse files
committed
JsonParser: Allow floats in documents
1 parent ab1618a commit 5cead91

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

py_partiql_parser/_internal/json_parser.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
NEW_LINE = "\n"
99

1010

11+
def int_or_float(val: str) -> float:
12+
return float(val) if "." in val else int(val)
13+
14+
def is_numeric(char: str) -> bool:
15+
return char.startswith("-") or char.isnumeric()
16+
17+
1118
class JsonParser:
1219
"""
1320
Input can be a multiple documents, separated by a new-line (\n) characters
@@ -18,7 +25,7 @@ class JsonParser:
1825
def parse(original: str) -> Iterator[Any]:
1926
if not (original.startswith("{") or original.startswith("[")):
2027
# Doesn't look like JSON - let's return as a variable
21-
yield original if original.isnumeric() else Variable(original)
28+
yield original if is_numeric(original) else Variable(original)
2229
tokenizer = ClauseTokenizer(original)
2330
while tokenizer.current() is not None:
2431
result = JsonParser._get_next_document(original, tokenizer)
@@ -93,7 +100,7 @@ def _get_next_document(
93100
level -= 1
94101
# End of a variable/number
95102
if section == "INT_VALUE":
96-
result[dict_key] = int(current_phrase)
103+
result[dict_key] = int_or_float(current_phrase)
97104
elif current_phrase.lower() in ["true", "false"]:
98105
result[dict_key] = current_phrase.lower() == "true"
99106
else:
@@ -106,7 +113,7 @@ def _get_next_document(
106113
tokenizer.revert()
107114
elif c in [","] and section in ["VAR_VALUE", "INT_VALUE"]:
108115
if section == "INT_VALUE":
109-
result[dict_key] = int(current_phrase)
116+
result[dict_key] = int_or_float(current_phrase)
110117
elif current_phrase.lower() in ["true", "false"]:
111118
result[dict_key] = current_phrase.lower() == "true"
112119
else:
@@ -126,10 +133,7 @@ def _get_next_document(
126133
if section == "KEY_TO_VALUE":
127134
# We found a value directly after the key, unquoted
128135
# That means it's either a variable, or a number
129-
if c.isnumeric():
130-
section = "INT_VALUE"
131-
else:
132-
section = "VAR_VALUE"
136+
section = "INT_VALUE" if is_numeric(c) else "VAR_VALUE"
133137
if section in ["DICT_KEY", "DICT_VAL", "INT_VALUE", "VAR_VALUE"]:
134138
current_phrase += c
135139
return result
@@ -162,17 +166,17 @@ def _parse_list(original: str, tokenizer: ClauseTokenizer) -> List[Any]:
162166
section = None
163167
elif c == "]" and not section:
164168
return result
165-
elif c == "]" and section == "VAR_VALUE":
166-
if current_phrase.isnumeric():
167-
result.append(int(current_phrase))
169+
elif c == "]" and section in ["INT_VALUE", "VAR_VALUE"]:
170+
if section == "INT_VALUE":
171+
result.append(int_or_float(current_phrase))
168172
elif current_phrase.lower() in ["true", "false"]:
169173
result.append(current_phrase.lower() == "true")
170174
else:
171175
result.append(Variable(current_phrase))
172176
return result
173-
elif c == "," and section == "VAR_VALUE":
174-
if current_phrase.isnumeric():
175-
result.append(int(current_phrase))
177+
elif c == "," and section in ["INT_VALUE", "VAR_VALUE"]:
178+
if section == "INT_VALUE":
179+
result.append(int_or_float(current_phrase))
176180
elif current_phrase.lower() in ["true", "false"]:
177181
result.append(current_phrase.lower() == "true")
178182
else:
@@ -184,8 +188,8 @@ def _parse_list(original: str, tokenizer: ClauseTokenizer) -> List[Any]:
184188
tokenizer.skip_white_space()
185189
elif not section:
186190
current_phrase += c
187-
section = "VAR_VALUE"
188-
elif section in ["VALUE", "VAR_VALUE"]:
191+
section = "INT_VALUE" if is_numeric(c) else "VAR_VALUE"
192+
elif section in ["VALUE", "INT_VALUE", "VAR_VALUE"]:
189193
current_phrase += c
190194
return result
191195

tests/test_json_parser.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,27 @@ def test_multiline_bool_parser() -> None:
137137
assert list(JsonParser.parse(combined)) == [obj1, obj2]
138138

139139

140+
@pytest.mark.parametrize(
141+
"source",
142+
[
143+
{"sth": 1},
144+
{"sth": 1, "other": "y"},
145+
{"sth": 1.7},
146+
{"sth": 1.7, "other": "y"},
147+
[{"staff": [{"name": "J M", "age": 75}]}],
148+
[{"staff": [{"name": "J M", "age": 75.2}]}],
149+
{"sth": [1, 1.7]},
150+
{"sth": ["asdf", 1.7]},
151+
{"sth": ["asdf", 1]},
152+
{"sth": [1, "asdf"]},
153+
{"sth": [-1, "asdf"]},
154+
{"sth": [-1.4, "asdf"]},
155+
],
156+
)
157+
def test_numbers(source: Any) -> None: # type: ignore[misc]
158+
assert list(JsonParser.parse(json.dumps(source))) == [source]
159+
160+
140161
@pytest.mark.parametrize("nr_of_docs", [1, 25, 2500])
141162
def test_large_object(nr_of_docs: int) -> None:
142163
data = "".join(

0 commit comments

Comments
 (0)