Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
cebefee
bump version on setup.py
rafpyprog Jul 5, 2019
108059a
add badges to readme
rafpyprog Jul 5, 2019
0c5da47
Merge pull request #10 from rafpyprog/v2.0
rafpyprog Jul 5, 2019
e3e9a6c
Updates README.rst
rafpyprog Jul 5, 2019
07f0348
Merge branch 'master' into develop
rafpyprog Jul 5, 2019
b45db6e
Fix bug when time series search returned more than one result pages
rafpyprog Jul 15, 2019
be8a156
Fix date parsing when only year is present
rafpyprog Jul 15, 2019
aeb47c9
Merge branch 'master' into develop
rafpyprog Jul 15, 2019
e4ce043
Fix date parsing when only year is present and enforce srtring inputs…
rafpyprog Jul 15, 2019
0b578bb
Merge branch 'master' into develop
rafpyprog Jul 15, 2019
dda2633
fixes #28
rafpyprog Dec 10, 2019
654215c
fixes #28
rafpyprog Dec 10, 2019
79f9603
conclude merge
opardal Nov 9, 2020
341276d
updating...
opardal Nov 9, 2020
18be96a
Merge pull request #4 from opardal/rafpyprog-develop
opardal Nov 9, 2020
e5ce858
Deixando os branches develop iguais
opardal Nov 11, 2020
0160d94
Incluidas novas funcoes de data em common.py
opardal Nov 11, 2020
2c5e8f6
Alterado modo strict da funcao time_serie
opardal Nov 11, 2020
b28fc59
Testes para funcao search corrigidos
opardal Nov 11, 2020
c087f18
Disponibiliza a funcao apply_strict_range no init
opardal Nov 11, 2020
9149f00
Removida funcao get_data_with_strict_range do api.py
opardal Nov 11, 2020
c274993
Teste api.py corrigido
opardal Nov 11, 2020
6485467
Criacao da funcao get_series_code e atualizacao da funcao metadata
opardal Nov 11, 2020
7b1dbb6
correcao de formatacao
opardal Nov 11, 2020
2d3f254
Corrigindo formato das datas no dicionario de metadata
opardal Nov 11, 2020
05b6956
Separada a funcionalidade strict em novos arquivos
opardal Nov 12, 2020
ccb1e6d
Update pytest.ini
opardal Nov 12, 2020
a8b1aa4
strict incuded in pytest
opardal Jan 5, 2021
2c52f50
metadata function now accepts lists and tuples
opardal Jan 6, 2021
e53fc43
optional end implemented
opardal Jan 6, 2021
03b0cab
simple refactoring
opardal Jan 7, 2021
61bd050
Merge branch 'new_date_handling' of https://github.com/opardal/pySGS …
opardal Jan 7, 2021
db292b3
tests updates
opardal Jan 7, 2021
e43c4cd
pytest.ini fixed
opardal Jan 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ markers =
mypy
search
ts
strict
1 change: 1 addition & 0 deletions sgs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from .ts import time_serie
from .metadata import metadata
from .search import search_ts
from .strict import constrain
43 changes: 3 additions & 40 deletions sgs/api.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import functools
from typing import Union, List, Dict
from typing import List

import pandas as pd
import requests
from retrying import retry

from .common import LRU_CACHE_SIZE, MAX_ATTEMPT_NUMBER, to_datetime
from .common import LRU_CACHE_SIZE, MAX_ATTEMPT_NUMBER


@retry(stop_max_attempt_number=MAX_ATTEMPT_NUMBER)
Expand All @@ -14,6 +13,7 @@ def get_data(ts_code: int, begin: str, end: str) -> List:
"""
Requests time series data from the SGS API in json format.
"""
end = begin if end is None else end

url = (
"http://api.bcb.gov.br/dados/serie/bcdata.sgs.{}"
Expand All @@ -22,40 +22,3 @@ def get_data(ts_code: int, begin: str, end: str) -> List:
request_url = url.format(ts_code, begin, end)
response = requests.get(request_url)
return response.json()

def get_data_with_strict_range(ts_code: int, begin: str, end: str) -> List:

"""
Request time series data from the SGS API considering a strict range of dates.

SGS API default behaviour returns the last stored value when selected date range have no data.

It is possible to catch this behaviour when the first record date precedes the start date.

This function enforces an empty data set when the first record date precedes the start date, avoiding records out of selected range.

:param ts_code: time serie code.
:param begin: start date (DD/MM/YYYY).
:param end: end date (DD/MM/YYYY).

:return: Data in json format or an empty list
:rtype: list

"""
data = get_data(ts_code, begin, end)

first_record_date = to_datetime(data[0]["data"], "pt")
period_start_date = to_datetime(begin, 'pt')

try:
is_out_of_range = first_record_date < period_start_date #type: ignore
if is_out_of_range:
raise ValueError
except TypeError:
print("ERROR: Serie " + str(ts_code) + " - Please, use 'DD/MM/YYYY' format for date strings.")
data = []
except ValueError:
print("WARNING: Serie " + str(ts_code) + " - There is no data for the requested period, but there's previous data.")
data = []

return data
50 changes: 35 additions & 15 deletions sgs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,65 @@
Shared functions.
"""
from datetime import datetime
from typing import Union, List, Tuple
import locale
import re
from typing import Union
import pandas as pd
import os


LRU_CACHE_SIZE = 32
MAX_ATTEMPT_NUMBER = 5


def to_datetime(date_string: str, language: str) -> Union[datetime, str]:
""" Converts a date string to a datetime object """
locales = {"pt": "pt_BR.utf-8", "en": "en_US.utf-8"}
def to_datetime(date_string: str, language: str) -> datetime:

""" correct problem with locale in Windows platform """
if os.name == 'nt':
locales = {"pt": "Portuguese_Brazil.1252", "en": "Portuguese_Brazil.1252"}

else:
locales = {"pt": "pt_BR.utf-8", "en": "en_US.utf-8"}

locale.setlocale(locale.LC_TIME, locales[language])

dd_mm_aaaa = "%d/%m/%Y"
mmm_aaaa = "%b/%Y"
aaaa = "%Y"

formats = [dd_mm_aaaa, mmm_aaaa]
formats = [dd_mm_aaaa, mmm_aaaa, aaaa]

for fmt in formats:
try:
date = datetime.strptime(date_string, fmt)
if fmt == aaaa:
date = date.replace(day=31, month=12)
break
except ValueError:
continue
else:
yyyy = "[0-9]{4}"
if re.match(yyyy, date_string):
year = int(date_string)
month = 12
day = 31
date = datetime(year, month, day)
else:
return date_string # returns original value if cant parse
raise ValueError
return date


def to_datetime_string(date_string: str, language: str, strformat: str = "%Y-%m-%d") -> str:

try:
date = to_datetime(date_string, language).strftime(strformat)
except ValueError:
date = date_string
return date


def get_series_codes(code_input: Union[int, List, Tuple, pd.DataFrame, pd.Series]) -> List:

if isinstance(code_input, int):
codes = [code_input]
elif isinstance(code_input, pd.Series):
codes = [code_input.name]
elif isinstance(code_input, pd.DataFrame):
codes = list(code_input.columns)
elif isinstance(code_input, tuple):
codes = list(code_input)
else:
codes = code_input

return codes
9 changes: 4 additions & 5 deletions sgs/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
"""
Dataframe
"""
from typing import Dict, List, Tuple, Union
from typing import Dict, List, Tuple, Union, Optional

import pandas as pd

from . import api
from . import search
from .ts import time_serie
from .common import get_series_codes


def dataframe(ts_codes: Union[int, List, Tuple], start: str, end: str, strict: bool = False) -> pd.DataFrame:
def dataframe(ts_codes: Union[int, List, Tuple], start: str, end: Optional[str] = None, strict: bool = False) -> pd.DataFrame:
"""
Creates a dataframe from a list of time serie codes.

Expand All @@ -36,11 +37,9 @@ def dataframe(ts_codes: Union[int, List, Tuple], start: str, end: str, strict: b
2018-01-05 0.026444 NaN

"""
if isinstance(ts_codes, int):
ts_codes = [ts_codes]

series = []
for code in ts_codes:
for code in get_series_codes(ts_codes):
ts = time_serie(code, start, end, strict)
series.append(ts)

Expand Down
24 changes: 10 additions & 14 deletions sgs/metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import Optional, Dict, List, Union

import pandas as pd

from typing import List, Tuple, Union
from .search import search_ts
from .common import get_series_codes


def metadata(ts_code: Union[int, pd.DataFrame], language: str = "en") -> Optional[List]:
def metadata(ts_code: Union[int, List, Tuple, pd.DataFrame, pd.Series], language: str = "en") -> List:
"""Request metadata about a time serie or all time series in a pandas dataframe.

:param ts_code: time serie code or pandas dataframe with time series as columns.
Expand All @@ -27,14 +26,11 @@ def metadata(ts_code: Union[int, pd.DataFrame], language: str = "en") -> Optiona
'last_value': Timestamp('2019-05-01 00:00:00'), 'source': 'FGV'}]
"""
info = []
if isinstance(ts_code, pd.core.frame.DataFrame):
for col in ts_code.columns:
col_info = search_ts(col, language)
if col_info is not None:
info.append(col_info[0])
else:
info.append(None)
else:
col_info = search_ts(ts_code, language)
info.append(col_info)
for ts in get_series_codes(ts_code):
try:
metadata = search_ts(ts, language)
info.extend(metadata)
except NameError:
info.append(None)

return info
6 changes: 3 additions & 3 deletions sgs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from retrying import retry
import pandas as pd

from .common import LRU_CACHE_SIZE, MAX_ATTEMPT_NUMBER, to_datetime
from .common import LRU_CACHE_SIZE, MAX_ATTEMPT_NUMBER, to_datetime_string


@unique
Expand Down Expand Up @@ -78,8 +78,8 @@ def parse_search_response(response, language: str) -> Optional[list]:

try:
df = pd.read_html(HTML, attrs={"id": "tabelaSeries"}, flavor="html5lib", skiprows=1)[0]
df[START] = df[START].map(lambda x: to_datetime(str(x), language))
df[LAST] = df[LAST].map(lambda x: to_datetime(str(x), language))
df[START] = df[START].map(lambda x: to_datetime_string(str(x), language, "%Y-%m-%d %H:%M:%S"))
df[LAST] = df[LAST].map(lambda x: to_datetime_string(str(x), language, "%Y-%m-%d %H:%M:%S"))
col_names = {
cols["code"]: "code",
cols["name"]: "name",
Expand Down
36 changes: 36 additions & 0 deletions sgs/strict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pandas as pd
from typing import Union, Optional
from .common import to_datetime, get_series_codes


def constrain(data: Union[pd.DataFrame, pd.Series], start: str, end: Optional[str] = None) -> Union[pd.DataFrame, pd.Series]:

"""
SGS API default behaviour returns the last stored value when selected date range have no data.

This function enforces the date range selected by user.

:param data: time_serie or dataframe to be filtered.
:param start: start date (DD/MM/YYYY).
:param end: end date (DD/MM/YYYY).

:return: time_serie or dataframe
:rtype: pd.Series or pd.DataFrame
"""

end = start if end is None else end

try:
enforce_start = to_datetime(start, "pt")
enforce_end = to_datetime(end, "pt")
strict_data = data[data.index.to_series().between(enforce_start, enforce_end)]
if strict_data.empty or data.empty:
raise RuntimeError
except ValueError:
strict_data = data.drop(data.index)
print("ERROR: Please, use 'DD/MM/YYYY' format for date strings.")
except RuntimeError:
series = ','.join(str(code) for code in get_series_codes(data))
print("WARNING: Serie(s) %s - There is no data for the requested period." % series)

return strict_data
23 changes: 12 additions & 11 deletions sgs/ts.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""
Time Serie manipulation
"""
from typing import Dict, List, Optional

import numpy as np
import pandas as pd

from typing import Optional
from . import api
from . import search
from .strict import constrain
from .common import to_datetime


def time_serie(ts_code: int, start: str, end: str, strict: bool = False) -> pd.Series:
def time_serie(ts_code: int, start: str, end: Optional[str] = None, strict: bool = False) -> pd.Series:
"""
Request a time serie data.

Expand All @@ -34,18 +34,19 @@ def time_serie(ts_code: int, start: str, end: str, strict: bool = False) -> pd.S
2018-01-05 0.026444
2018-01-08 0.026444
"""

if strict:
ts_data = api.get_data_with_strict_range(ts_code, start, end)
else:
ts_data = api.get_data(ts_code, start, end)


values = []
index = []
for i in ts_data:
for i in api.get_data(ts_code, start, end):
values.append(i["valor"])
index.append(to_datetime(i["data"], "pt"))

# Transform empty strings in null values
values = [np.nan if value == "" else value for value in values]
return pd.Series(values, index, name=ts_code, dtype=np.float)

ts = pd.Series(values, index, name=ts_code, dtype=np.float)

if strict:
ts = constrain(ts, start, end)

return ts
9 changes: 0 additions & 9 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pytest

from sgs import api
import pandas as pd


@pytest.mark.api
Expand All @@ -10,11 +9,3 @@ def test_get_data():
data = api.get_data(4, "02/01/2018", "31/01/2018")
assert isinstance(data, list)
assert len(data) == NUMBER_OF_LINES

@pytest.mark.api
def test_get_data_with_strict_range():
NUMBER_OF_LINES = 0
data = api.get_data_with_strict_range(20577, "17/08/2019", "18/08/2019")
assert isinstance(data, list)
assert len(data) == NUMBER_OF_LINES

34 changes: 32 additions & 2 deletions tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datetime import datetime

import pytest
from sgs.common import to_datetime

from sgs.common import to_datetime, to_datetime_string, get_series_codes
from sgs.dataframe import dataframe
from sgs.ts import time_serie

@pytest.mark.common
@pytest.mark.parametrize("language", ["pt", "en"])
Expand All @@ -26,3 +27,32 @@ def test_to_datetime_aaaaa():
expected = datetime(day=31, month=12, year=2018)
date_string = '2018'
assert to_datetime(date_string, 'pt') == expected

@pytest.mark.common
@pytest.mark.parametrize(
"input_str, expected", [("01/05/2020", "2020-05-01"),
("mai/2020", "2020-05-01"), ("2020", "2020-12-31")]
)
def test_to_datetime_string_pt(input_str, expected):
assert to_datetime_string(input_str, 'pt') == expected

@pytest.mark.common
def test_to_datetime_string_en():
expected = "2020-05-01"
assert to_datetime_string('may/2020', 'en') == expected

@pytest.mark.common
def test_to_datetime_string_full_format():
expected = "2020-12-31 00:00:00"
assert to_datetime_string('31/12/2020', 'pt', "%Y-%m-%d %H:%M:%S") == expected

@pytest.mark.common
@pytest.mark.parametrize(
"input_code", [12, [12], (12), dataframe(12, '01/01/2020', '01/02/2020'),
time_serie(12, '01/01/2020', '01/02/2020')]
)
def test_get_series_codes(input_code):
codes = get_series_codes(input_code)
expected = [12]
assert codes == expected
assert isinstance(codes, list)
Loading