1
1
# -*- coding: utf-8 -*-
2
2
import re
3
3
import string
4
- from typing import Callable , Optional , Pattern , List , Tuple
4
+ from typing import Callable , Match , Optional , Pattern , List , Tuple
5
5
from decimal import Decimal , InvalidOperation
6
6
7
7
import attr
@@ -36,11 +36,11 @@ def fromstring(cls, price: Optional[str],
36
36
``price`` string, it could be **preferred** over a value extracted
37
37
from ``currency_hint`` string.
38
38
"""
39
- amount_text = extract_price_text (price ) if price is not None else None
39
+ currency , source = _extract_currency_symbol (price , currency_hint )
40
+ amount_text = extract_price_text (price , currency if source == price else None ) if price is not None else None
40
41
amount_num = parse_number (amount_text ) if amount_text is not None else None
41
- currency = extract_currency_symbol (price , currency_hint )
42
42
if currency is not None :
43
- currency = currency .strip ()
43
+ currency = currency .group ( 0 ). strip ()
44
44
return Price (
45
45
amount = amount_num ,
46
46
currency = currency ,
@@ -120,11 +120,11 @@ def or_regex(symbols: List[str]) -> Pattern:
120
120
_search_unsafe_currency = or_regex (OTHER_CURRENCY_SYMBOLS ).search
121
121
122
122
123
- def extract_currency_symbol (price : Optional [str ],
124
- currency_hint : Optional [str ]) -> Optional [str ]:
123
+ def _extract_currency_symbol (price : Optional [str ], currency_hint : Optional [str ]) -> Tuple [Optional [Match ], Optional [str ]]:
125
124
"""
126
- Guess currency symbol from extracted price and currency strings.
127
- Return an empty string if symbol is not found.
125
+ Guess the currency symbol from extracted price and currency strings.
126
+ Return a (`match object`_, source_string) tuple with the symbol found and
127
+ the string where it was found, or (None, None) if no symbol is found.
128
128
"""
129
129
methods : List [Tuple [Callable , Optional [str ]]] = [
130
130
(_search_safe_currency , price ),
@@ -142,17 +142,32 @@ def extract_currency_symbol(price: Optional[str],
142
142
for meth , attr in methods :
143
143
m = meth (attr ) if attr else None
144
144
if m :
145
- return m .group (0 )
145
+ return m , attr
146
+
147
+ return None , None
146
148
149
+
150
+ def extract_currency_symbol (price : Optional [str ],
151
+ currency_hint : Optional [str ]) -> Optional [str ]:
152
+ """
153
+ Guess currency symbol from extracted price and currency strings.
154
+ Return the symbol as found as a string, or None if no symbol is found.
155
+ """
156
+ match , _ = _extract_currency_symbol (price , currency_hint )
157
+ if match :
158
+ return match .group (0 )
147
159
return None
148
160
149
161
150
- def extract_price_text (price : str ) -> Optional [str ]:
162
+ def extract_price_text (price : str , currency_match : Optional [ Match ] = None ) -> Optional [str ]:
151
163
"""
152
164
Extract text of a price from a string which contains price and
153
- maybe some other text. If multiple price-looking substrings are present,
154
- the first is returned (FIXME: it is better to return a number
155
- which is near a currency symbol).
165
+ maybe some other text.
166
+
167
+ If a match object of the currency within the `price` string is provided,
168
+ amounts before or after the matched currency substring are prioritized.
169
+ Otherwise, if multiple price-looking substrings are present, the first is
170
+ returned.
156
171
157
172
>>> extract_price_text("price: $12.99")
158
173
'12.99'
@@ -189,16 +204,39 @@ def extract_price_text(price: str) -> Optional[str]:
189
204
""" , price , re .VERBOSE )
190
205
if m :
191
206
return m .group (0 ).replace (' ' , '' )
207
+
208
+ def number_from_match (m ):
209
+ return m .group (1 ).strip (',.' ).strip ()
210
+
211
+ if currency_match is not None :
212
+
213
+ m = re .search (r"""
214
+ (\d[\d\s.,]*) # number, probably with thousand separators
215
+ \s*$ # only match right before the currency symbol
216
+ """ , price [:currency_match .start (0 )], re .VERBOSE )
217
+ if m :
218
+ return number_from_match (m )
219
+
220
+ m = re .search (r"""
221
+ ^\s* # only match right after the currency symbol
222
+ (\d[\d\s.,]*) # number, probably with thousand separators
223
+ \s* # skip whitespace
224
+ (?:[^%\d]|$) # capture next symbol - it shouldn't be %
225
+ """ , price [currency_match .end (0 ):], re .VERBOSE )
226
+ if m :
227
+ return number_from_match (m )
228
+
192
229
m = re .search (r"""
193
230
(\d[\d\s.,]*) # number, probably with thousand separators
194
231
\s* # skip whitespace
195
232
(?:[^%\d]|$) # capture next symbol - it shouldn't be %
196
233
""" , price , re .VERBOSE )
197
-
198
234
if m :
199
- return m .group (1 ).strip (',.' ).strip ()
235
+ return number_from_match (m )
236
+
200
237
if 'free' in price .lower ():
201
238
return '0'
239
+
202
240
return None
203
241
204
242
0 commit comments