gunthercox · gunthercox · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025 · Copilot
diff --git a/mathparse/mathparse.py b/mathparse/mathparse.py
@@ -400,13 +400,15 @@ def preprocess_unary_operators(tokens: list) -> list:
     * At the beginning of the expression
     * After an opening parenthesis '('
     * After a binary operator `(+, -, *, /, ^)`
+    * After a unary function `(sqrt, log, neg)`
     """
     if not tokens:
         return tokens
 
     processed_tokens = []
 
-    binary_operators = mathwords.BINARY_OPERATORS | {'('}
+    # A following minus sign should be treated as unary (negative)
+    unary_contexts = mathwords.BINARY_OPERATORS | {'('}
 
     for i, token in enumerate(tokens):
         if token == '-':
@@ -418,8 +420,9 @@ def preprocess_unary_operators(tokens: list) -> list:
                 is_unary_minus = True
             elif i > 0:
                 prev_token = tokens[i - 1]
-                # A unary minus after opening parenthesis or binary operators
-                if prev_token in binary_operators:
+                # A unary minus after opening parenthesis, binary operators,
+                # or unary functions
+                if prev_token in unary_contexts or is_unary(prev_token):
                     is_unary_minus = True
 
             if is_unary_minus:
@@ -539,14 +542,20 @@ def evaluate_postfix(tokens: list) -> Union[int, float, str, Decimal]:
             elif token == '.':
                 # Treat decimal points as a binary operator that combines the
                 # integer and fractional part of two numbers
-                # Example: 53 . 25 = 53.25
+                # Example: 53 . 25 = 53.25, -3 . 5 = -3.5
                 if b == 0:
                     total = Decimal(a)
                 else:
                     # Count the digits in b to determine the divisor
                     digits = len(str(int(b)))
                     divisor = 10 ** digits
-                    total = a + (b / divisor)
+                    fractional_part = b / divisor
+                    # Handle negative numbers correctly: -3 . 5 should be -3.5,
+                    # not -2.5
+                    if a < 0:
+                        total = a - fractional_part
+                    else:
+                        total = a + fractional_part
             else:
                 raise PostfixTokenEvaluationException(
                     'Unknown token "{}"'.format(token)
@@ -612,8 +621,22 @@ def tokenize(string: str, language: str = None, escape: str = '___') -> list:
                 string = string.replace(spaced_phrase, phrase)
 
     # Binary operators must have space around them to be tokenized properly
+    # Special handling for minus sign: preserve leading negatives
     for operator in mathwords.BINARY_OPERATORS:
-        string = string.replace(operator, f' {operator} ')
+        if operator == '-':
+            # For minus sign, use a pattern that only spaces it when it's
+            # clearly a binary operator (after digits or closing parenthesis).
+            # This preserves leading negatives like "-3" and distinguishes
+            # between:
+            # - "What is -3 + 3" --> "-3 + 3" (minus is part of number)
+            # - "5 - 3" --> "5 - 3" (minus is binary operator, needs spacing)
+            # - "math - 4" --> "- 4" (minus after letters is not a binary
+            #   operator)
+            # Only add spaces around minus when preceded by a digit or closing
+            # parenthesis
+            string = re.sub(r'([\d)])\s*-\s*', r'\1 - ', string)
+        else:
+            string = string.replace(operator, f' {operator} ')
 
     # Parenthesis must have space around them to be tokenized properly
     string = string.replace('(', ' ( ')
@@ -752,11 +775,46 @@ def extract_expression(dirty_string: str, language: str) -> str:
     start_index = 0
     end_index = len(tokens)
 
-    for part in tokens:
+    # Find the start of the mathematical expression
+    # Skip over non-mathematical tokens AND isolated binary operators
+    for i, part in enumerate(tokens):
         if is_symbol(part) or is_word(part, language):
-            break
-        else:
-            start_index += 1
+            # A potential start was found, so check if it's a standalone binary
+            # operator. Binary operators (except '(') are only valid at the
+            # start if they are unary (such as a leading minus for a negative
+            # number)
+            if part in mathwords.BINARY_OPERATORS and part != '(':
+                # For a binary operator to be the start of an expression, it
+                # must be:
+                # 1. At the very beginning (position 0) - could be unary minus
+                # 2. OR all previous tokens were non-mathematical - also could
+                #    be unary
+                # If there were non-math tokens before it, it's likely a
+                # separator
+
+                # Check if all previous tokens are non-mathematical
+                all_prev_non_math = True
+                for j in range(i):
+                    if is_symbol(tokens[j]) or is_word(tokens[j], language):
+                        all_prev_non_math = False
+                        break
+
+                # Only include this operator if at start OR all previous
+                # non-math AND followed by a mathematical token
+                if all_prev_non_math and i + 1 < len(tokens):
+                    next_token = tokens[i + 1]
+                    if (
+                        is_int(next_token) or is_float(next_token) or
+                        is_constant(next_token) or is_unary(next_token) or
+                        next_token == '(' or is_word(next_token, language)
+                    ):
+                        start_index = i
+                        break
+            else:
+                # Start indexes can be an opening parenthesis, or a non-binary
+                # operator
+                start_index = i
+                break
 
     for part in reversed(tokens):
         if is_symbol(part) or is_word(part, language):

diff --git a/tests/test_prefix_unary_operations.py b/tests/test_prefix_unary_operations.py
@@ -0,0 +1,243 @@
+from unittest import TestCase
+from mathparse import mathparse
+
+
+class NegativeNumberTestCase(TestCase):
+    """
+    Test cases for basic negative number operations.
+    """
+
+    def test_leading_negative_with_addition(self):
+        """
+        Test: -3 + 3 should equal 0
+        """
+        result = mathparse.parse('-3 + 3')
+        self.assertEqual(result, 0)
+
+    def test_leading_negative_with_subtraction(self):
+        """
+        Test: -3 - 5 should equal -8
+        """
+        result = mathparse.parse('-3 - 5')
+        self.assertEqual(result, -8)
+
+    def test_leading_negative_with_multiplication(self):
+        """
+        Test: -10 * 2 should equal -20
+        """
+        result = mathparse.parse('-10 * 2')
+        self.assertEqual(result, -20)
+
+    def test_negative_in_parentheses(self):
+        """
+        Test: (-3) should equal -3
+        """
+        result = mathparse.parse('(-3)')
+        self.assertEqual(result, -3)
+
+    def test_negative_in_parentheses_with_addition(self):
+        """
+        Test: (-3) + 5 should equal 2
+        """
+        result = mathparse.parse('(-3) + 5')
+        self.assertEqual(result, 2)
+
+    def test_addition_with_negative_in_parentheses(self):
+        """
+        Test: 5 + (-3) should equal 2
+        """
+        result = mathparse.parse('5 + (-3)')
+        self.assertEqual(result, 2)
+
+    def test_two_negatives_in_parentheses(self):
+        """
+        Test: (-3) * (-2) should equal 6
+        """
+        result = mathparse.parse('(-3) * (-2)')
+        self.assertEqual(result, 6)
+
+    def test_multiplication_by_negative(self):
+        """
+        Test: 3 * -2 should equal -6
+        """
+        result = mathparse.parse('3 * -2')
+        self.assertEqual(result, -6)
+
+    def test_subtraction_of_negative(self):
+        """
+        Test: 3 - -2 should equal 5 (subtracting a negative)
+        """
+        result = mathparse.parse('3--2')
+        self.assertEqual(result, 5)
+
+    def test_two_separate_negatives_with_addition(self):
+        """
+        Test: -3 + -5 should equal -8
+        """
+        result = mathparse.parse('-3 + -5')
+        self.assertEqual(result, -8)
+
+    def test_complex_expression_with_leading_negative(self):
+        """
+        Test: (-3 + 5) * 2 should equal 4
+        """
+        result = mathparse.parse('(-3 + 5) * 2')
+        self.assertEqual(result, 4)
+
+    def test_negative_after_operator_in_expression(self):
+        """
+        Test: 2 * (-3 + 5) should equal 4
+        """
+        result = mathparse.parse('2 * (-3 + 5)')
+        self.assertEqual(result, 4)
+
+    def test_negative_of_expression(self):
+        """
+        Test: -(3 + 5) should equal -8
+        """
+        result = mathparse.parse('-(3 + 5)')
+        self.assertEqual(result, -8)
+
+    def test_negative_decimal(self):
+        """
+        Test: -3.5 should equal -3.5 (parsed as -3 . 5)
-        Test: -3.5 should equal -3.5 (parsed as -3 . 5)
+        Test: -3.5 should equal -3.5 (parsed as a single token '-3.5')
-        Test: -3.5 should equal -3.5 (parsed as -3 . 5)
+        Test: -3.5 should equal -3.5 (parsed as a single token '-3.5')
+        """
+        result = mathparse.parse('-3.5')
+        self.assertEqual(float(result), -3.5)
+
+    def test_negative_decimal_in_expression(self):
+        """
+        Test: -10.25 + 5 should work correctly
+        """
+        result = mathparse.parse('-10.25 + 5')
+        # Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25
-        # Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25
+        # Parses as ['-10.25', '+', '5'] = -10.25 + 5 = -5.25
-        # Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25
+        # Parses as ['-10.25', '+', '5'] = -10.25 + 5 = -5.25
+        self.assertEqual(float(result), -5.25)
+
+    def test_positive_decimal(self):
+        """
+        Test: 3.5 should equal 3.5
+        """
+        result = mathparse.parse('3.5')
+        self.assertEqual(float(result), 3.5)
+
+    def test_negative_decimal_with_multiple_digits(self):
+        """
+        Test: -53.25 should equal -53.25
+        """
+        result = mathparse.parse('-53.25')
+        self.assertEqual(float(result), -53.25)
+
+    def test_negative_decimal_in_multiplication(self):
+        """
+        Test: -2.5 * 4 should equal -10.0
+        """
+        result = mathparse.parse('-2.5 * 4')
+        self.assertEqual(float(result), -10.0)
+
+
+class NegativeWithUnaryFunctionsTestCase(TestCase):
+    """
+    Test cases for negative numbers with unary functions.
+    """
+
+    def test_sqrt_with_negative_causes_math_error(self):
+        """
+        Test: sqrt -16 should parse correctly but give a math domain error.
+
+        The tokenizer keeps -16 as a single token '-16', which is correctly
+        identified as an integer and passed to sqrt, causing the expected
+        math domain error.
+        """
+        # Verify tokenization
+        tokens = mathparse.tokenize('sqrt -16')
+        self.assertEqual(tokens, ['sqrt', '-16'])
+
+        # Verify it causes the expected math error
+        with self.assertRaises(ValueError) as context:
+            mathparse.parse('sqrt -16')
+        self.assertIn("math domain error", str(context.exception).lower())
+
+    def test_log_with_negative_causes_math_error(self):
+        """
+        Test: log -10 should parse correctly but give a math domain error.
+        """
+        # Verify tokenization
+        tokens = mathparse.tokenize('log -10')
+        self.assertEqual(tokens, ['log', '-10'])
+
+        # Verify it causes the expected math error
+        with self.assertRaises(ValueError) as context:
+            mathparse.parse('log -10')
+        self.assertIn("math domain error", str(context.exception).lower())
+
+    def test_sqrt_with_positive_after_operator(self):
+        """
+        Test: 3 + sqrt 16 should equal 7.0
+        """
+        result = mathparse.parse('3 + sqrt 16')
+        self.assertEqual(result, 7.0)
+
+    def test_multiple_negatives_with_operations(self):
+        """
+        Test: -2 * -3 + -1 should equal 5
+        """
+        result = mathparse.parse('-2 * -3 + -1')
+        self.assertEqual(result, 5)
+
+
+class ExtractExpressionNegativeTestCase(TestCase):
+    """
+    Test cases for extract_expression with negative numbers.
+    """
+
+    def test_extract_leading_negative(self):
+        """
+        Test: 'What is -3 + 3?' should extract '-3 + 3'
+        """
+        result = mathparse.extract_expression(
+            'What is -3 + 3?', language='ENG'
+        )
+        self.assertEqual(result, '-3 + 3')
+
+    def test_extract_negative_with_multiplication(self):
+        """
+        Test: 'Calculate -5 * 2' should extract '-5 * 2'
+        """
+        result = mathparse.extract_expression(
+            'Calculate -5 * 2', language='ENG'
+        )
+        self.assertEqual(result, '-5 * 2')
+
+    def test_extract_binary_subtraction(self):
+        """
+        Test: 'The answer to 10 - 5 is' should extract '10 - 5'
+        """
+        result = mathparse.extract_expression(
+            'The answer to 10 - 5 is', language='ENG'
+        )
+        self.assertEqual(result, '10 - 5')
+
+    def test_extract_pure_negative_expression(self):
+        """
+        Test: '-3 + 3' should extract '-3 + 3' (with no extra text)
+        """
+        result = mathparse.extract_expression('-3 + 3', language='ENG')
+        self.assertEqual(result, '-3 + 3')
+
+    def test_extract_pure_subtraction(self):
+        """
+        Test: '5 - 3' should extract '5 - 3' (with no extra text)
+        """
+        result = mathparse.extract_expression('5 - 3', language='ENG')
+        self.assertEqual(result, '5 - 3')
+
+    def test_extract_negative_in_parentheses_with_words(self):
+        """
+        Test: Extract expression with negative in parentheses from sentence
+        """
+        result = mathparse.extract_expression(
+            'Compute (-3) + 5', language='ENG'
+        )
+        # NOTE: Spaces are currently added, but ideally these will be removed
+        # in the future
-        # in the future
+        # in the future (see issue #123 for tracking: https://github.com/yourorg/yourrepo/issues/123)
-        # in the future
+        # in the future (see issue #123 for tracking: https://github.com/yourorg/yourrepo/issues/123)
+        self.assertEqual(result, '( -3 ) + 5')
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -81,6 +81,18 @@ def test_extract_expression(self):
 
         self.assertEqual(result, '3 + 3')
 
+    def test_extract_expression_with_negative(self):
+        result = mathparse.extract_expression('-3 + 3', language='ENG')
+
+        self.assertEqual(result, '-3 + 3')
+
+    def test_extract_expression_with_words(self):
+        result = mathparse.extract_expression(
+            'three plus three', language='ENG'
+        )
+
+        self.assertEqual(result, 'three plus three')
+
     def test_ignore_punctuation(self):
         result = mathparse.extract_expression('3?', language='ENG')