Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Negative price amount #33

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 49 additions & 12 deletions price_parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import re
import string
from collections import namedtuple
from typing import Callable, Optional, Pattern, List, Tuple
from decimal import Decimal, InvalidOperation

Expand Down Expand Up @@ -37,18 +38,21 @@ def fromstring(cls, price: Optional[str],
``price`` string, it could be **preferred** over a value extracted
from ``currency_hint`` string.
"""
amount_text = extract_price_text(price) if price is not None else None
amount_num = (
parse_number(amount_text, decimal_separator)
if amount_text is not None else None
)
currency = extract_currency_symbol(price, currency_hint)
if currency is not None:
currency = currency.strip()

price_amount = _extract_price_amount(price, currency)

amount_num = (
parse_number(price_amount.text, decimal_separator, price_amount.negative)
if price_amount.text is not None else None
)

return Price(
amount=amount_num,
currency=currency,
amount_text=amount_text,
amount_text=price_amount.text,
)


Expand Down Expand Up @@ -189,6 +193,34 @@ def extract_price_text(price: str) -> Optional[str]:
>>> extract_price_text("50")
'50'
"""
price_amount = _extract_price_amount(price, currency)
return price_amount.text


def _extract_price_amount(price: str, currency: Optional[str] = "") -> Optional[str]:
"""
Extract from a string the text of a price and a flag indicating
if this is a string of a negative price.
"""

PriceAmount = namedtuple('PriceAmount', ['text', 'negative'])

if price is None:
return PriceAmount(text=None, negative=False)

negative_regexes = [
r"-\s*?\d[\d.,\d]*",
r"\d[\d.,\d]*\d-",
]
if currency is not None:
negative_regexes.append(r"-{}\d[\d.,\d]*".format(re.escape(currency)))
negative_amount_search = re.search(
r"({})(?:[^%\d]|$)".format("|".join(negative_regexes)),
price,
re.VERBOSE
)
negative_amount = bool(negative_amount_search)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we are determining if the amount is negative or positive before we have extracted the amount. I think this may be a problem, for example once #5 is merged, where - 2 items at 24,00 € could parse the price as -24€ (I did not check, though).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You broke my tests! :-(
You are right. This scenario will fail doing that way. However I don't see how to identify it after extracting the amount. I will include it to the tests and try to solve it.


if price.count('€') == 1:
m = re.search(r"""
[\d\s.,]*?\d # number, probably with thousand separators
Expand All @@ -197,18 +229,21 @@ def extract_price_text(price: str) -> Optional[str]:
(?:$|[^\d]) # something which is not a digit
""", price, re.VERBOSE)
if m:
return m.group(0).replace(' ', '')
return PriceAmount(text=m.group(0).replace(' ', ''), negative=negative_amount)

m = re.search(r"""
(\d[\d\s.,]*) # number, probably with thousand separators
\s*? # skip whitespace
(?:[^%\d]|$) # capture next symbol - it shouldn't be %
""", price, re.VERBOSE)

if m:
return m.group(1).strip(',.').strip()
return PriceAmount(text=m.group(1).strip(',.').strip(), negative=negative_amount)

if 'free' in price.lower():
return '0'
return None
return PriceAmount(text="0", negative=negative_amount)

return PriceAmount(text=None, negative=negative_amount)


# NOTE: Keep supported separators in sync with parse_number()
Expand Down Expand Up @@ -244,7 +279,8 @@ def get_decimal_separator(price: str) -> Optional[str]:


def parse_number(num: str,
decimal_separator: Optional[str] = None) -> Optional[Decimal]:
decimal_separator: Optional[str] = None,
is_negative: Optional[bool] = False) -> Optional[Decimal]:
""" Parse a string with a number to a Decimal, guessing its format:
decimal separator, thousand separator. Return None if parsing fails.

Expand Down Expand Up @@ -294,6 +330,7 @@ def parse_number(num: str,
assert decimal_separator == '€'
num = num.replace('.', '').replace(',', '').replace('€', '.')
try:
return Decimal(num)
multiplier = -1 if is_negative else 1
return multiplier * Decimal(num)
except InvalidOperation:
return None
23 changes: 23 additions & 0 deletions tests/test_price_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1980,6 +1980,28 @@ def __eq__(self, other):
]


PRICE_PARSING_NEGATIVE_PRICE = [
Example(None, 'R$ -2,00',
'R$', '2,00', -2.0),
Example(None, '- 1.400,00',
None, '1.400,00', -1400),
Example(None, '-3,95',
None, '3,95', -3.95),
Example(None, '-1.649,69',
None, '1.649,69', -1649.69),
Example(None, '-£127.54',
"£", '127.54', -127.54),
Example(None, '-R$127.54',
"R$", '127.54', -127.54),
Example(None, '-127,54 €',
"€", '127,54', -127.54),
Example(None, 'kr-127,54',
"kr", '127,54', -127.54),
Example(None, '€ 127,54-',
"€", '127,54', -127.54),
]


@pytest.mark.parametrize(
["example"],
[[e] for e in PRICE_PARSING_EXAMPLES_BUGS_CAUGHT] +
Expand All @@ -1990,6 +2012,7 @@ def __eq__(self, other):
[[e] for e in PRICE_PARSING_EXAMPLES_NO_PRICE] +
[[e] for e in PRICE_PARSING_EXAMPLES_NO_CURRENCY] +
[[e] for e in PRICE_PARSING_DECIMAL_SEPARATOR_EXAMPLES] +
[[e] for e in PRICE_PARSING_NEGATIVE_PRICE] +
[pytest.param(e, marks=pytest.mark.xfail())
for e in PRICE_PARSING_EXAMPLES_XFAIL]
)
Expand Down