Skip to content

Commit f51994a

Browse files
committed
Reject NaN/inf numeric results instead of returning them as valid
_validate_response checks isinstance(value, (int, float, np.int64)) for type 'number', but NaN and inf are floats and pass that check, so they are wrapped in a NumberResponse and returned as the answer. These almost always come from an aggregation over an empty result (e.g. df['sales'].mean() when a filter matched zero rows) - a silent wrong answer. This adds a finite-number check that raises InvalidOutputValueMismatch for NaN/inf. Adds a regression test (no LLM) that fails on main and passes with the fix.
1 parent bbbb771 commit f51994a

2 files changed

Lines changed: 41 additions & 0 deletions

File tree

pandasai/core/response/parser.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ def _validate_response(self, result: dict):
4343
raise InvalidOutputValueMismatch(
4444
"Invalid output: Expected a numeric value for result type 'number', but received a non-numeric value."
4545
)
46+
# NaN / inf are floats, so they pass the isinstance check above and would be
47+
# returned as a valid number. They almost always come from an aggregation over
48+
# empty data (e.g. df["x"].mean() on a zero-row result) - reject instead of
49+
# silently returning NaN as the answer.
50+
if isinstance(result["value"], float) and not np.isfinite(result["value"]):
51+
raise InvalidOutputValueMismatch(
52+
"Invalid output: Numeric result is NaN or infinite (likely an aggregation over empty data)."
53+
)
4654
elif result["type"] == "string":
4755
if not isinstance(result["value"], str):
4856
raise InvalidOutputValueMismatch(

tests/test_nan_number_rejected.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Regression test: a NaN/inf numeric result must be rejected, not returned as valid.
2+
3+
NaN and inf are floats, so they pass the isinstance(value, (int, float, np.int64))
4+
check in _validate_response and would be wrapped in a NumberResponse and returned as
5+
the answer. They almost always come from an aggregation over an empty result
6+
(e.g. df["sales"].mean() when a WHERE clause matched zero rows).
7+
8+
with the fix -> PASS (raises InvalidOutputValueMismatch)
9+
without it -> FAIL (returns NumberResponse(nan) silently)
10+
"""
11+
import numpy as np
12+
import pytest
13+
14+
from pandasai.core.response.parser import ResponseParser
15+
from pandasai.exceptions import InvalidOutputValueMismatch
16+
17+
18+
def test_nan_number_rejected():
19+
parser = ResponseParser()
20+
with pytest.raises(InvalidOutputValueMismatch, match="NaN"):
21+
parser.parse({"type": "number", "value": float("nan")})
22+
23+
24+
def test_inf_number_rejected():
25+
parser = ResponseParser()
26+
with pytest.raises(InvalidOutputValueMismatch, match="NaN"):
27+
parser.parse({"type": "number", "value": float("inf")})
28+
29+
30+
def test_normal_number_still_ok():
31+
parser = ResponseParser()
32+
resp = parser.parse({"type": "number", "value": 42})
33+
assert resp.value == 42

0 commit comments

Comments
 (0)