Skip to content

Commit 402c1f2

Browse files
replace comma-decimal handling with param
Alternate decimal points now specified with function parameter
1 parent 246855d commit 402c1f2

File tree

2 files changed

+31
-12
lines changed

2 files changed

+31
-12
lines changed

lingua_franca/parse.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,20 @@ def match_one(query, choices):
8080
return best
8181

8282

83-
def normalize_decimals(text):
83+
def normalize_decimals(text, decimal):
8484
"""
85-
Replace decimal commas with decimal periods so Python can floatify them
85+
Replace 'decimal' with decimal periods so Python can floatify them
8686
"""
87-
sanitize_decimals = re.compile(r"\b\d+,{1}\d+\b")
87+
regex = r"\b\d+" + decimal + r"{1}\d+\b"
88+
sanitize_decimals = re.compile(regex)
8889
for _, match in enumerate(re.finditer(sanitize_decimals, text)):
89-
text = text.replace(match.group(0), match.group(0).replace(',', '.'))
90+
text = text.replace(match.group(
91+
0), match.group(0).replace(decimal, '.'))
9092
return text
9193

9294

93-
def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
95+
def extract_numbers(text, short_scale=True, ordinals=False, lang=None,
96+
decimal='.'):
9497
"""
9598
Takes in a string and extracts a list of numbers.
9699
@@ -102,10 +105,16 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
102105
See https://en.wikipedia.org/wiki/Names_of_large_numbers
103106
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
104107
lang (str): the BCP-47 code for the language to use, None uses default
108+
decimal (str): character to use as decimal point. defaults to '.'
105109
Returns:
106110
list: list of extracted numbers as floats, or empty list if none found
111+
Note:
112+
will always extract numbers formatted with a decimal dot/full stop,
113+
such as '3.5', even if 'decimal' is specified.
107114
"""
108-
text = normalize_decimals(text)
115+
if decimal != '.':
116+
text = normalize_decimals(text, decimal)
117+
109118
lang_code = get_primary_lang_code(lang)
110119
if lang_code == "en":
111120
return extract_numbers_en(text, short_scale, ordinals)
@@ -125,7 +134,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
125134
return []
126135

127136

128-
def extract_number(text, short_scale=True, ordinals=False, lang=None):
137+
def extract_number(text, short_scale=True, ordinals=False, lang=None,
138+
decimal='.'):
129139
"""Takes in a string and extracts a number.
130140
131141
Args:
@@ -136,11 +146,17 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
136146
See https://en.wikipedia.org/wiki/Names_of_large_numbers
137147
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
138148
lang (str): the BCP-47 code for the language to use, None uses default
149+
decimal (str): character to use as decimal point. defaults to '.'
139150
Returns:
140151
(int, float or False): The number extracted or False if the input
141152
text contains no numbers
153+
Note:
154+
will always extract numbers formatted with a decimal dot/full stop,
155+
such as '3.5', even if 'decimal' is specified.
142156
"""
143-
text = normalize_decimals(text)
157+
if decimal != '.':
158+
text = normalize_decimals(text, decimal)
159+
144160
lang_code = get_primary_lang_code(lang)
145161
if lang_code == "en":
146162
return extractnumber_en(text, short_scale=short_scale,

test/test_parse.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,9 @@ def test_extract_number(self):
125125
short_scale=False), 1e-12)
126126

127127
# Test decimal normalization
128-
self.assertEqual(extract_number("4,4"), 4.4)
129-
self.assertEqual(extract_number("we have 3,5 kilometers to go"), 3.5)
128+
self.assertEqual(extract_number("4,4", decimal=','), 4.4)
129+
self.assertEqual(extract_number("we have 3,5 kilometers to go",
130+
decimal=','), 3.5)
130131

131132
# TODO handle this case
132133
# self.assertEqual(
@@ -708,9 +709,11 @@ def test_multiple_numbers(self):
708709
self.assertEqual(extract_numbers("this is a seven eight nine and a"
709710
" half test"),
710711
[7.0, 8.0, 9.5])
711-
self.assertEqual(extract_numbers("this is a seven eight 9,5 test"),
712+
self.assertEqual(extract_numbers("this is a seven eight 9,5 test",
713+
decimal=','),
712714
[7.0, 8.0, 9.5])
713-
self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test"), [7.0, 8.0, 9.6])
715+
self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test",
716+
decimal=','), [7.0, 8.0, 9.6])
714717

715718
def test_contractions(self):
716719
self.assertEqual(normalize("ain't"), "is not")

0 commit comments

Comments
 (0)