1
1
# -*- coding: utf-8 -*-
2
2
import re
3
3
import string
4
- from typing import Callable , Optional , Pattern , List , Tuple
4
+ from typing import Callable , Match , Optional , Pattern , List , Tuple
5
5
from decimal import Decimal , InvalidOperation
6
6
7
7
import attr
@@ -36,11 +36,11 @@ def fromstring(cls, price: Optional[str],
36
36
``price`` string, it could be **preferred** over a value extracted
37
37
from ``currency_hint`` string.
38
38
"""
39
- amount_text = extract_price_text (price ) if price is not None else None
39
+ currency , source = _extract_currency_symbol (price , currency_hint )
40
+ amount_text = extract_price_text (price , currency if source == price else None ) if price is not None else None
40
41
amount_num = parse_number (amount_text ) if amount_text is not None else None
41
- currency = extract_currency_symbol (price , currency_hint )
42
42
if currency is not None :
43
- currency = currency .strip ()
43
+ currency = currency .group ( 0 ). strip ()
44
44
return Price (
45
45
amount = amount_num ,
46
46
currency = currency ,
@@ -120,11 +120,12 @@ def or_regex(symbols: List[str]) -> Pattern:
120
120
_search_unsafe_currency = or_regex (OTHER_CURRENCY_SYMBOLS ).search
121
121
122
122
123
- def extract_currency_symbol (price : Optional [str ],
124
- currency_hint : Optional [str ]) -> Optional [str ]:
123
+ def _extract_currency_symbol (price : Optional [str ],
124
+ currency_hint : Optional [str ]) -> Optional [str ]:
125
125
"""
126
- Guess currency symbol from extracted price and currency strings.
127
- Return an empty string if symbol is not found.
126
+ Guess the currency symbol from extracted price and currency strings.
127
+ Return a (`match object`_, source_string) tuple with the symbol found and
128
+ the string where it was found, or (None, None) if no symbol is found.
128
129
"""
129
130
methods : List [Tuple [Callable , Optional [str ]]] = [
130
131
(_search_safe_currency , price ),
@@ -142,17 +143,32 @@ def extract_currency_symbol(price: Optional[str],
142
143
for meth , attr in methods :
143
144
m = meth (attr ) if attr else None
144
145
if m :
145
- return m .group (0 )
146
+ return m , attr
147
+
148
+ return None , None
146
149
150
+
151
+ def extract_currency_symbol (price : Optional [str ],
152
+ currency_hint : Optional [str ]) -> Optional [str ]:
153
+ """
154
+ Guess currency symbol from extracted price and currency strings.
155
+ Return the symbol as found as a string, or None if no symbol is found.
156
+ """
157
+ match , _ = _extract_currency_symbol (price , currency_hint )
158
+ if match :
159
+ return match .group (0 )
147
160
return None
148
161
149
162
150
- def extract_price_text (price : str ) -> Optional [str ]:
163
+ def extract_price_text (price : str , currency_match : Optional [ Match ] = None ) -> Optional [str ]:
151
164
"""
152
165
Extract text of a price from a string which contains price and
153
- maybe some other text. If multiple price-looking substrings are present,
154
- the first is returned (FIXME: it is better to return a number
155
- which is near a currency symbol).
166
+ maybe some other text.
167
+
168
+ If a match object of the currency within the `price` string is provided,
169
+ amounts before or after the matched currency substring are prioritized.
170
+ Otherwise, if multiple price-looking substrings are present, the first is
171
+ returned.
156
172
157
173
>>> extract_price_text("price: $12.99")
158
174
'12.99'
@@ -189,16 +205,39 @@ def extract_price_text(price: str) -> Optional[str]:
189
205
""" , price , re .VERBOSE )
190
206
if m :
191
207
return m .group (0 ).replace (' ' , '' )
208
+
209
+ def number_from_match (m ):
210
+ return m .group (1 ).strip (',.' ).strip ()
211
+
212
+ if currency_match is not None :
213
+
214
+ m = re .search (r"""
215
+ (\d[\d\s.,]*) # number, probably with thousand separators
216
+ \s*$ # only match right before the currency symbol
217
+ """ , price [:currency_match .start (0 )], re .VERBOSE )
218
+ if m :
219
+ return number_from_match (m )
220
+
221
+ m = re .search (r"""
222
+ ^\s* # only match right after the currency symbol
223
+ (\d[\d\s.,]*) # number, probably with thousand separators
224
+ \s* # skip whitespace
225
+ (?:[^%\d]|$) # capture next symbol - it shouldn't be %
226
+ """ , price [currency_match .end (0 ):], re .VERBOSE )
227
+ if m :
228
+ return number_from_match (m )
229
+
192
230
m = re .search (r"""
193
231
(\d[\d\s.,]*) # number, probably with thousand separators
194
232
\s* # skip whitespace
195
233
(?:[^%\d]|$) # capture next symbol - it shouldn't be %
196
234
""" , price , re .VERBOSE )
197
-
198
235
if m :
199
- return m .group (1 ).strip (',.' ).strip ()
236
+ return number_from_match (m )
237
+
200
238
if 'free' in price .lower ():
201
239
return '0'
240
+
202
241
return None
203
242
204
243
0 commit comments