diff --git a/mathparse/mathparse.py b/mathparse/mathparse.py index a71f8a2..9e572b5 100644 --- a/mathparse/mathparse.py +++ b/mathparse/mathparse.py @@ -400,13 +400,15 @@ def preprocess_unary_operators(tokens: list) -> list: * At the beginning of the expression * After an opening parenthesis '(' * After a binary operator `(+, -, *, /, ^)` + * After a unary function `(sqrt, log, neg)` """ if not tokens: return tokens processed_tokens = [] - binary_operators = mathwords.BINARY_OPERATORS | {'('} + # A following minus sign should be treated as unary (negative) + unary_contexts = mathwords.BINARY_OPERATORS | {'('} for i, token in enumerate(tokens): if token == '-': @@ -418,8 +420,9 @@ def preprocess_unary_operators(tokens: list) -> list: is_unary_minus = True elif i > 0: prev_token = tokens[i - 1] - # A unary minus after opening parenthesis or binary operators - if prev_token in binary_operators: + # A unary minus after opening parenthesis, binary operators, + # or unary functions + if prev_token in unary_contexts or is_unary(prev_token): is_unary_minus = True if is_unary_minus: @@ -539,14 +542,20 @@ def evaluate_postfix(tokens: list) -> Union[int, float, str, Decimal]: elif token == '.': # Treat decimal points as a binary operator that combines the # integer and fractional part of two numbers - # Example: 53 . 25 = 53.25 + # Example: 53 . 25 = 53.25, -3 . 5 = -3.5 if b == 0: total = Decimal(a) else: # Count the digits in b to determine the divisor digits = len(str(int(b))) divisor = 10 ** digits - total = a + (b / divisor) + fractional_part = b / divisor + # Handle negative numbers correctly: -3 . 5 should be -3.5, + # not -2.5 + if a < 0: + total = a - fractional_part + else: + total = a + fractional_part else: raise PostfixTokenEvaluationException( 'Unknown token "{}"'.format(token) @@ -612,8 +621,22 @@ def tokenize(string: str, language: str = None, escape: str = '___') -> list: string = string.replace(spaced_phrase, phrase) # Binary operators must have space around them to be tokenized properly + # Special handling for minus sign: preserve leading negatives for operator in mathwords.BINARY_OPERATORS: - string = string.replace(operator, f' {operator} ') + if operator == '-': + # For minus sign, use a pattern that only spaces it when it's + # clearly a binary operator (after digits or closing parenthesis). + # This preserves leading negatives like "-3" and distinguishes + # between: + # - "What is -3 + 3" --> "-3 + 3" (minus is part of number) + # - "5 - 3" --> "5 - 3" (minus is binary operator, needs spacing) + # - "math - 4" --> "- 4" (minus after letters is not a binary + # operator) + # Only add spaces around minus when preceded by a digit or closing + # parenthesis + string = re.sub(r'([\d)])\s*-\s*', r'\1 - ', string) + else: + string = string.replace(operator, f' {operator} ') # Parenthesis must have space around them to be tokenized properly string = string.replace('(', ' ( ') @@ -752,11 +775,46 @@ def extract_expression(dirty_string: str, language: str) -> str: start_index = 0 end_index = len(tokens) - for part in tokens: + # Find the start of the mathematical expression + # Skip over non-mathematical tokens AND isolated binary operators + for i, part in enumerate(tokens): if is_symbol(part) or is_word(part, language): - break - else: - start_index += 1 + # A potential start was found, so check if it's a standalone binary + # operator. Binary operators (except '(') are only valid at the + # start if they are unary (such as a leading minus for a negative + # number) + if part in mathwords.BINARY_OPERATORS and part != '(': + # For a binary operator to be the start of an expression, it + # must be: + # 1. At the very beginning (position 0) - could be unary minus + # 2. OR all previous tokens were non-mathematical - also could + # be unary + # If there were non-math tokens before it, it's likely a + # separator + + # Check if all previous tokens are non-mathematical + all_prev_non_math = True + for j in range(i): + if is_symbol(tokens[j]) or is_word(tokens[j], language): + all_prev_non_math = False + break + + # Only include this operator if at start OR all previous + # non-math AND followed by a mathematical token + if all_prev_non_math and i + 1 < len(tokens): + next_token = tokens[i + 1] + if ( + is_int(next_token) or is_float(next_token) or + is_constant(next_token) or is_unary(next_token) or + next_token == '(' or is_word(next_token, language) + ): + start_index = i + break + else: + # Start indexes can be an opening parenthesis, or a non-binary + # operator + start_index = i + break for part in reversed(tokens): if is_symbol(part) or is_word(part, language): diff --git a/tests/test_prefix_unary_operations.py b/tests/test_prefix_unary_operations.py new file mode 100644 index 0000000..2104389 --- /dev/null +++ b/tests/test_prefix_unary_operations.py @@ -0,0 +1,243 @@ +from unittest import TestCase +from mathparse import mathparse + + +class NegativeNumberTestCase(TestCase): + """ + Test cases for basic negative number operations. + """ + + def test_leading_negative_with_addition(self): + """ + Test: -3 + 3 should equal 0 + """ + result = mathparse.parse('-3 + 3') + self.assertEqual(result, 0) + + def test_leading_negative_with_subtraction(self): + """ + Test: -3 - 5 should equal -8 + """ + result = mathparse.parse('-3 - 5') + self.assertEqual(result, -8) + + def test_leading_negative_with_multiplication(self): + """ + Test: -10 * 2 should equal -20 + """ + result = mathparse.parse('-10 * 2') + self.assertEqual(result, -20) + + def test_negative_in_parentheses(self): + """ + Test: (-3) should equal -3 + """ + result = mathparse.parse('(-3)') + self.assertEqual(result, -3) + + def test_negative_in_parentheses_with_addition(self): + """ + Test: (-3) + 5 should equal 2 + """ + result = mathparse.parse('(-3) + 5') + self.assertEqual(result, 2) + + def test_addition_with_negative_in_parentheses(self): + """ + Test: 5 + (-3) should equal 2 + """ + result = mathparse.parse('5 + (-3)') + self.assertEqual(result, 2) + + def test_two_negatives_in_parentheses(self): + """ + Test: (-3) * (-2) should equal 6 + """ + result = mathparse.parse('(-3) * (-2)') + self.assertEqual(result, 6) + + def test_multiplication_by_negative(self): + """ + Test: 3 * -2 should equal -6 + """ + result = mathparse.parse('3 * -2') + self.assertEqual(result, -6) + + def test_subtraction_of_negative(self): + """ + Test: 3 - -2 should equal 5 (subtracting a negative) + """ + result = mathparse.parse('3--2') + self.assertEqual(result, 5) + + def test_two_separate_negatives_with_addition(self): + """ + Test: -3 + -5 should equal -8 + """ + result = mathparse.parse('-3 + -5') + self.assertEqual(result, -8) + + def test_complex_expression_with_leading_negative(self): + """ + Test: (-3 + 5) * 2 should equal 4 + """ + result = mathparse.parse('(-3 + 5) * 2') + self.assertEqual(result, 4) + + def test_negative_after_operator_in_expression(self): + """ + Test: 2 * (-3 + 5) should equal 4 + """ + result = mathparse.parse('2 * (-3 + 5)') + self.assertEqual(result, 4) + + def test_negative_of_expression(self): + """ + Test: -(3 + 5) should equal -8 + """ + result = mathparse.parse('-(3 + 5)') + self.assertEqual(result, -8) + + def test_negative_decimal(self): + """ + Test: -3.5 should equal -3.5 (parsed as -3 . 5) + """ + result = mathparse.parse('-3.5') + self.assertEqual(float(result), -3.5) + + def test_negative_decimal_in_expression(self): + """ + Test: -10.25 + 5 should work correctly + """ + result = mathparse.parse('-10.25 + 5') + # Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25 + self.assertEqual(float(result), -5.25) + + def test_positive_decimal(self): + """ + Test: 3.5 should equal 3.5 + """ + result = mathparse.parse('3.5') + self.assertEqual(float(result), 3.5) + + def test_negative_decimal_with_multiple_digits(self): + """ + Test: -53.25 should equal -53.25 + """ + result = mathparse.parse('-53.25') + self.assertEqual(float(result), -53.25) + + def test_negative_decimal_in_multiplication(self): + """ + Test: -2.5 * 4 should equal -10.0 + """ + result = mathparse.parse('-2.5 * 4') + self.assertEqual(float(result), -10.0) + + +class NegativeWithUnaryFunctionsTestCase(TestCase): + """ + Test cases for negative numbers with unary functions. + """ + + def test_sqrt_with_negative_causes_math_error(self): + """ + Test: sqrt -16 should parse correctly but give a math domain error. + + The tokenizer keeps -16 as a single token '-16', which is correctly + identified as an integer and passed to sqrt, causing the expected + math domain error. + """ + # Verify tokenization + tokens = mathparse.tokenize('sqrt -16') + self.assertEqual(tokens, ['sqrt', '-16']) + + # Verify it causes the expected math error + with self.assertRaises(ValueError) as context: + mathparse.parse('sqrt -16') + self.assertIn("math domain error", str(context.exception).lower()) + + def test_log_with_negative_causes_math_error(self): + """ + Test: log -10 should parse correctly but give a math domain error. + """ + # Verify tokenization + tokens = mathparse.tokenize('log -10') + self.assertEqual(tokens, ['log', '-10']) + + # Verify it causes the expected math error + with self.assertRaises(ValueError) as context: + mathparse.parse('log -10') + self.assertIn("math domain error", str(context.exception).lower()) + + def test_sqrt_with_positive_after_operator(self): + """ + Test: 3 + sqrt 16 should equal 7.0 + """ + result = mathparse.parse('3 + sqrt 16') + self.assertEqual(result, 7.0) + + def test_multiple_negatives_with_operations(self): + """ + Test: -2 * -3 + -1 should equal 5 + """ + result = mathparse.parse('-2 * -3 + -1') + self.assertEqual(result, 5) + + +class ExtractExpressionNegativeTestCase(TestCase): + """ + Test cases for extract_expression with negative numbers. + """ + + def test_extract_leading_negative(self): + """ + Test: 'What is -3 + 3?' should extract '-3 + 3' + """ + result = mathparse.extract_expression( + 'What is -3 + 3?', language='ENG' + ) + self.assertEqual(result, '-3 + 3') + + def test_extract_negative_with_multiplication(self): + """ + Test: 'Calculate -5 * 2' should extract '-5 * 2' + """ + result = mathparse.extract_expression( + 'Calculate -5 * 2', language='ENG' + ) + self.assertEqual(result, '-5 * 2') + + def test_extract_binary_subtraction(self): + """ + Test: 'The answer to 10 - 5 is' should extract '10 - 5' + """ + result = mathparse.extract_expression( + 'The answer to 10 - 5 is', language='ENG' + ) + self.assertEqual(result, '10 - 5') + + def test_extract_pure_negative_expression(self): + """ + Test: '-3 + 3' should extract '-3 + 3' (with no extra text) + """ + result = mathparse.extract_expression('-3 + 3', language='ENG') + self.assertEqual(result, '-3 + 3') + + def test_extract_pure_subtraction(self): + """ + Test: '5 - 3' should extract '5 - 3' (with no extra text) + """ + result = mathparse.extract_expression('5 - 3', language='ENG') + self.assertEqual(result, '5 - 3') + + def test_extract_negative_in_parentheses_with_words(self): + """ + Test: Extract expression with negative in parentheses from sentence + """ + result = mathparse.extract_expression( + 'Compute (-3) + 5', language='ENG' + ) + # NOTE: Spaces are currently added, but ideally these will be removed + # in the future + self.assertEqual(result, '( -3 ) + 5') diff --git a/tests/test_utils.py b/tests/test_utils.py index 6b3c402..8d26f01 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -81,6 +81,18 @@ def test_extract_expression(self): self.assertEqual(result, '3 + 3') + def test_extract_expression_with_negative(self): + result = mathparse.extract_expression('-3 + 3', language='ENG') + + self.assertEqual(result, '-3 + 3') + + def test_extract_expression_with_words(self): + result = mathparse.extract_expression( + 'three plus three', language='ENG' + ) + + self.assertEqual(result, 'three plus three') + def test_ignore_punctuation(self): result = mathparse.extract_expression('3?', language='ENG')