@@ -80,17 +80,20 @@ def match_one(query, choices):
80
80
return best
81
81
82
82
83
- def normalize_decimals (text ):
83
+ def normalize_decimals (text , decimal ):
84
84
"""
85
- Replace decimal commas with decimal periods so Python can floatify them
85
+ Replace ' decimal' with decimal periods so Python can floatify them
86
86
"""
87
- sanitize_decimals = re .compile (r"\b\d+,{1}\d+\b" )
87
+ regex = r"\b\d+" + decimal + r"{1}\d+\b"
88
+ sanitize_decimals = re .compile (regex )
88
89
for _ , match in enumerate (re .finditer (sanitize_decimals , text )):
89
- text = text .replace (match .group (0 ), match .group (0 ).replace (',' , '.' ))
90
+ text = text .replace (match .group (
91
+ 0 ), match .group (0 ).replace (decimal , '.' ))
90
92
return text
91
93
92
94
93
- def extract_numbers (text , short_scale = True , ordinals = False , lang = None ):
95
+ def extract_numbers (text , short_scale = True , ordinals = False , lang = None ,
96
+ decimal = '.' ):
94
97
"""
95
98
Takes in a string and extracts a list of numbers.
96
99
@@ -102,10 +105,16 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
102
105
See https://en.wikipedia.org/wiki/Names_of_large_numbers
103
106
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
104
107
lang (str): the BCP-47 code for the language to use, None uses default
108
+ decimal (str): character to use as decimal point. defaults to '.'
105
109
Returns:
106
110
list: list of extracted numbers as floats, or empty list if none found
111
+ Note:
112
+ will always extract numbers formatted with a decimal dot/full stop,
113
+ such as '3.5', even if 'decimal' is specified.
107
114
"""
108
- text = normalize_decimals (text )
115
+ if decimal != '.' :
116
+ text = normalize_decimals (text , decimal )
117
+
109
118
lang_code = get_primary_lang_code (lang )
110
119
if lang_code == "en" :
111
120
return extract_numbers_en (text , short_scale , ordinals )
@@ -125,7 +134,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
125
134
return []
126
135
127
136
128
- def extract_number (text , short_scale = True , ordinals = False , lang = None ):
137
+ def extract_number (text , short_scale = True , ordinals = False , lang = None ,
138
+ decimal = '.' ):
129
139
"""Takes in a string and extracts a number.
130
140
131
141
Args:
@@ -136,11 +146,17 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
136
146
See https://en.wikipedia.org/wiki/Names_of_large_numbers
137
147
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
138
148
lang (str): the BCP-47 code for the language to use, None uses default
149
+ decimal (str): character to use as decimal point. defaults to '.'
139
150
Returns:
140
151
(int, float or False): The number extracted or False if the input
141
152
text contains no numbers
153
+ Note:
154
+ will always extract numbers formatted with a decimal dot/full stop,
155
+ such as '3.5', even if 'decimal' is specified.
142
156
"""
143
- text = normalize_decimals (text )
157
+ if decimal != '.' :
158
+ text = normalize_decimals (text , decimal )
159
+
144
160
lang_code = get_primary_lang_code (lang )
145
161
if lang_code == "en" :
146
162
return extractnumber_en (text , short_scale = short_scale ,
0 commit comments