Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 68 additions & 10 deletions mathparse/mathparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,13 +400,15 @@ def preprocess_unary_operators(tokens: list) -> list:
* At the beginning of the expression
* After an opening parenthesis '('
* After a binary operator `(+, -, *, /, ^)`
* After a unary function `(sqrt, log, neg)`
"""
if not tokens:
return tokens

processed_tokens = []

binary_operators = mathwords.BINARY_OPERATORS | {'('}
# A following minus sign should be treated as unary (negative)
unary_contexts = mathwords.BINARY_OPERATORS | {'('}

for i, token in enumerate(tokens):
if token == '-':
Expand All @@ -418,8 +420,9 @@ def preprocess_unary_operators(tokens: list) -> list:
is_unary_minus = True
elif i > 0:
prev_token = tokens[i - 1]
# A unary minus after opening parenthesis or binary operators
if prev_token in binary_operators:
# A unary minus after opening parenthesis, binary operators,
# or unary functions
if prev_token in unary_contexts or is_unary(prev_token):
is_unary_minus = True

if is_unary_minus:
Expand Down Expand Up @@ -539,14 +542,20 @@ def evaluate_postfix(tokens: list) -> Union[int, float, str, Decimal]:
elif token == '.':
# Treat decimal points as a binary operator that combines the
# integer and fractional part of two numbers
# Example: 53 . 25 = 53.25
# Example: 53 . 25 = 53.25, -3 . 5 = -3.5
if b == 0:
total = Decimal(a)
else:
# Count the digits in b to determine the divisor
digits = len(str(int(b)))
divisor = 10 ** digits
total = a + (b / divisor)
fractional_part = b / divisor
# Handle negative numbers correctly: -3 . 5 should be -3.5,
# not -2.5
if a < 0:
total = a - fractional_part
else:
total = a + fractional_part
else:
raise PostfixTokenEvaluationException(
'Unknown token "{}"'.format(token)
Expand Down Expand Up @@ -612,8 +621,22 @@ def tokenize(string: str, language: str = None, escape: str = '___') -> list:
string = string.replace(spaced_phrase, phrase)

# Binary operators must have space around them to be tokenized properly
# Special handling for minus sign: preserve leading negatives
for operator in mathwords.BINARY_OPERATORS:
string = string.replace(operator, f' {operator} ')
if operator == '-':
# For minus sign, use a pattern that only spaces it when it's
# clearly a binary operator (after digits or closing parenthesis).
# This preserves leading negatives like "-3" and distinguishes
# between:
# - "What is -3 + 3" --> "-3 + 3" (minus is part of number)
# - "5 - 3" --> "5 - 3" (minus is binary operator, needs spacing)
# - "math - 4" --> "- 4" (minus after letters is not a binary
# operator)
# Only add spaces around minus when preceded by a digit or closing
# parenthesis
string = re.sub(r'([\d)])\s*-\s*', r'\1 - ', string)
else:
string = string.replace(operator, f' {operator} ')

# Parenthesis must have space around them to be tokenized properly
string = string.replace('(', ' ( ')
Expand Down Expand Up @@ -752,11 +775,46 @@ def extract_expression(dirty_string: str, language: str) -> str:
start_index = 0
end_index = len(tokens)

for part in tokens:
# Find the start of the mathematical expression
# Skip over non-mathematical tokens AND isolated binary operators
for i, part in enumerate(tokens):
if is_symbol(part) or is_word(part, language):
break
else:
start_index += 1
# A potential start was found, so check if it's a standalone binary
# operator. Binary operators (except '(') are only valid at the
# start if they are unary (such as a leading minus for a negative
# number)
if part in mathwords.BINARY_OPERATORS and part != '(':
# For a binary operator to be the start of an expression, it
# must be:
# 1. At the very beginning (position 0) - could be unary minus
# 2. OR all previous tokens were non-mathematical - also could
# be unary
# If there were non-math tokens before it, it's likely a
# separator

# Check if all previous tokens are non-mathematical
all_prev_non_math = True
for j in range(i):
if is_symbol(tokens[j]) or is_word(tokens[j], language):
all_prev_non_math = False
break

# Only include this operator if at start OR all previous
# non-math AND followed by a mathematical token
if all_prev_non_math and i + 1 < len(tokens):
next_token = tokens[i + 1]
if (
is_int(next_token) or is_float(next_token) or
is_constant(next_token) or is_unary(next_token) or
next_token == '(' or is_word(next_token, language)
):
start_index = i
break
else:
# Start indexes can be an opening parenthesis, or a non-binary
# operator
start_index = i
break

for part in reversed(tokens):
if is_symbol(part) or is_word(part, language):
Expand Down
243 changes: 243 additions & 0 deletions tests/test_prefix_unary_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
from unittest import TestCase
from mathparse import mathparse


class NegativeNumberTestCase(TestCase):
"""
Test cases for basic negative number operations.
"""

def test_leading_negative_with_addition(self):
"""
Test: -3 + 3 should equal 0
"""
result = mathparse.parse('-3 + 3')
self.assertEqual(result, 0)

def test_leading_negative_with_subtraction(self):
"""
Test: -3 - 5 should equal -8
"""
result = mathparse.parse('-3 - 5')
self.assertEqual(result, -8)

def test_leading_negative_with_multiplication(self):
"""
Test: -10 * 2 should equal -20
"""
result = mathparse.parse('-10 * 2')
self.assertEqual(result, -20)

def test_negative_in_parentheses(self):
"""
Test: (-3) should equal -3
"""
result = mathparse.parse('(-3)')
self.assertEqual(result, -3)

def test_negative_in_parentheses_with_addition(self):
"""
Test: (-3) + 5 should equal 2
"""
result = mathparse.parse('(-3) + 5')
self.assertEqual(result, 2)

def test_addition_with_negative_in_parentheses(self):
"""
Test: 5 + (-3) should equal 2
"""
result = mathparse.parse('5 + (-3)')
self.assertEqual(result, 2)

def test_two_negatives_in_parentheses(self):
"""
Test: (-3) * (-2) should equal 6
"""
result = mathparse.parse('(-3) * (-2)')
self.assertEqual(result, 6)

def test_multiplication_by_negative(self):
"""
Test: 3 * -2 should equal -6
"""
result = mathparse.parse('3 * -2')
self.assertEqual(result, -6)

def test_subtraction_of_negative(self):
"""
Test: 3 - -2 should equal 5 (subtracting a negative)
"""
result = mathparse.parse('3--2')
self.assertEqual(result, 5)

def test_two_separate_negatives_with_addition(self):
"""
Test: -3 + -5 should equal -8
"""
result = mathparse.parse('-3 + -5')
self.assertEqual(result, -8)

def test_complex_expression_with_leading_negative(self):
"""
Test: (-3 + 5) * 2 should equal 4
"""
result = mathparse.parse('(-3 + 5) * 2')
self.assertEqual(result, 4)

def test_negative_after_operator_in_expression(self):
"""
Test: 2 * (-3 + 5) should equal 4
"""
result = mathparse.parse('2 * (-3 + 5)')
self.assertEqual(result, 4)

def test_negative_of_expression(self):
"""
Test: -(3 + 5) should equal -8
"""
result = mathparse.parse('-(3 + 5)')
self.assertEqual(result, -8)

def test_negative_decimal(self):
"""
Test: -3.5 should equal -3.5 (parsed as -3 . 5)
Copy link

Copilot AI Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment incorrectly describes how -3.5 is parsed. Based on the implementation, -3.5 is tokenized as a single token '-3.5', not as separate tokens '-3', '.', and '5'.

Suggested change
Test: -3.5 should equal -3.5 (parsed as -3 . 5)
Test: -3.5 should equal -3.5 (parsed as a single token '-3.5')

Copilot uses AI. Check for mistakes.
"""
result = mathparse.parse('-3.5')
self.assertEqual(float(result), -3.5)

def test_negative_decimal_in_expression(self):
"""
Test: -10.25 + 5 should work correctly
"""
result = mathparse.parse('-10.25 + 5')
# Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25
Copy link

Copilot AI Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment incorrectly describes the parsing behavior. The expression '-10.25 + 5' is tokenized as ['-10.25', '+', '5'], not as separate decimal components.

Suggested change
# Parses as (-10) . 25 + 5 = -10.25 + 5 = -5.25
# Parses as ['-10.25', '+', '5'] = -10.25 + 5 = -5.25

Copilot uses AI. Check for mistakes.
self.assertEqual(float(result), -5.25)

def test_positive_decimal(self):
"""
Test: 3.5 should equal 3.5
"""
result = mathparse.parse('3.5')
self.assertEqual(float(result), 3.5)

def test_negative_decimal_with_multiple_digits(self):
"""
Test: -53.25 should equal -53.25
"""
result = mathparse.parse('-53.25')
self.assertEqual(float(result), -53.25)

def test_negative_decimal_in_multiplication(self):
"""
Test: -2.5 * 4 should equal -10.0
"""
result = mathparse.parse('-2.5 * 4')
self.assertEqual(float(result), -10.0)


class NegativeWithUnaryFunctionsTestCase(TestCase):
"""
Test cases for negative numbers with unary functions.
"""

def test_sqrt_with_negative_causes_math_error(self):
"""
Test: sqrt -16 should parse correctly but give a math domain error.

The tokenizer keeps -16 as a single token '-16', which is correctly
identified as an integer and passed to sqrt, causing the expected
math domain error.
"""
# Verify tokenization
tokens = mathparse.tokenize('sqrt -16')
self.assertEqual(tokens, ['sqrt', '-16'])

# Verify it causes the expected math error
with self.assertRaises(ValueError) as context:
mathparse.parse('sqrt -16')
self.assertIn("math domain error", str(context.exception).lower())

def test_log_with_negative_causes_math_error(self):
"""
Test: log -10 should parse correctly but give a math domain error.
"""
# Verify tokenization
tokens = mathparse.tokenize('log -10')
self.assertEqual(tokens, ['log', '-10'])

# Verify it causes the expected math error
with self.assertRaises(ValueError) as context:
mathparse.parse('log -10')
self.assertIn("math domain error", str(context.exception).lower())

def test_sqrt_with_positive_after_operator(self):
"""
Test: 3 + sqrt 16 should equal 7.0
"""
result = mathparse.parse('3 + sqrt 16')
self.assertEqual(result, 7.0)

def test_multiple_negatives_with_operations(self):
"""
Test: -2 * -3 + -1 should equal 5
"""
result = mathparse.parse('-2 * -3 + -1')
self.assertEqual(result, 5)


class ExtractExpressionNegativeTestCase(TestCase):
"""
Test cases for extract_expression with negative numbers.
"""

def test_extract_leading_negative(self):
"""
Test: 'What is -3 + 3?' should extract '-3 + 3'
"""
result = mathparse.extract_expression(
'What is -3 + 3?', language='ENG'
)
self.assertEqual(result, '-3 + 3')

def test_extract_negative_with_multiplication(self):
"""
Test: 'Calculate -5 * 2' should extract '-5 * 2'
"""
result = mathparse.extract_expression(
'Calculate -5 * 2', language='ENG'
)
self.assertEqual(result, '-5 * 2')

def test_extract_binary_subtraction(self):
"""
Test: 'The answer to 10 - 5 is' should extract '10 - 5'
"""
result = mathparse.extract_expression(
'The answer to 10 - 5 is', language='ENG'
)
self.assertEqual(result, '10 - 5')

def test_extract_pure_negative_expression(self):
"""
Test: '-3 + 3' should extract '-3 + 3' (with no extra text)
"""
result = mathparse.extract_expression('-3 + 3', language='ENG')
self.assertEqual(result, '-3 + 3')

def test_extract_pure_subtraction(self):
"""
Test: '5 - 3' should extract '5 - 3' (with no extra text)
"""
result = mathparse.extract_expression('5 - 3', language='ENG')
self.assertEqual(result, '5 - 3')

def test_extract_negative_in_parentheses_with_words(self):
"""
Test: Extract expression with negative in parentheses from sentence
"""
result = mathparse.extract_expression(
'Compute (-3) + 5', language='ENG'
)
# NOTE: Spaces are currently added, but ideally these will be removed
# in the future
Copy link

Copilot AI Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This TODO comment should include a reference to a tracking issue or be more specific about the timeline for this improvement.

Suggested change
# in the future
# in the future (see issue #123 for tracking: https://github.com/yourorg/yourrepo/issues/123)

Copilot uses AI. Check for mistakes.
self.assertEqual(result, '( -3 ) + 5')
12 changes: 12 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ def test_extract_expression(self):

self.assertEqual(result, '3 + 3')

def test_extract_expression_with_negative(self):
result = mathparse.extract_expression('-3 + 3', language='ENG')

self.assertEqual(result, '-3 + 3')

def test_extract_expression_with_words(self):
result = mathparse.extract_expression(
'three plus three', language='ENG'
)

self.assertEqual(result, 'three plus three')

def test_ignore_punctuation(self):
result = mathparse.extract_expression('3?', language='ENG')

Expand Down