Skip to content

Commit

Permalink
Feature/oecd 2023 release (#138)
Browse files Browse the repository at this point in the history
* closes #132

* Updated urls
  • Loading branch information
jaimeoliver1 authored Apr 11, 2024
1 parent 9449717 commit 9de5936
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 22 deletions.
55 changes: 37 additions & 18 deletions pymrio/tools/iodownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@
"2010-2014": "https://stats.oecd.org/wbos/fileview2.aspx?IDFile=2c2f499f-5703-4034-9457-2f7518e8f2fc",
"2015-2018": "https://stats.oecd.org/wbos/fileview2.aspx?IDFile=59a3d7f2-3f23-40d5-95ca-48da84c0f861",
},
"v2023": {
"1995-2000": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d26ad811-5b58-4f0c-a4e3-06a1469e475c",
"2001-2005": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=7cb93dae-e491-4cfd-ac67-889eb7016a4a",
"2006-2010": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=ea165bfb-3a85-4e0a-afee-6ba8e6c16052",
"2011-2015": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=1f791bc6-befb-45c5-8b34-668d08a1702a",
"2016-2020": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d1ab2315-298c-4e93-9a81-c6f2273139fe",
},
},
}

Expand Down Expand Up @@ -198,7 +205,7 @@ def _download_urls(


def download_oecd(
storage_folder, version="v2021", years=None, overwrite_existing=False
storage_folder, version="v2023", years=None, overwrite_existing=False
):
"""Downloads the OECD ICIO tables
Expand Down Expand Up @@ -242,37 +249,35 @@ def download_oecd(
os.makedirs(storage_folder, exist_ok=True)

if type(version) is int:
version = str(version)

if ("8" in version) or ("4" in version):
version = "v2018"
elif ("3" in version) or ("6" in version):
version = "v2016"
elif "21" in version:
version = "v2021"
else:
raise ValueError("Version not understood")
version = "v" + str(version)

v2021_years = ["1995-1999", "2000-2004", "2005-2009", "2010-2014", "2015-2018"]
if not version in ("v2016", "v2018", "v2021", "v2023"):
raise ValueError("Version not understood")

if type(years) is int or type(years) is str:
years = [years]

if version == "v2021":
bundle_years = ["1995-1999", "2000-2004", "2005-2009", "2010-2014", "2015-2018"]
elif version == "v2023":
bundle_years = ["1995-2000", "2001-2005", "2006-2010", "2011-2015", "2016-2020"]

if not years:
if version == "v2018":
years = range(2005, 2016)
elif version == "v2021":
years = v2021_years

years = bundle_years.copy()
elif version == "v2023":
years = bundle_years.copy()
else:
years = range(1995, 2012)

years = [str(yy) for yy in years]

if version == "v2021":
if version == "v2021" or version == "v2023":
for index, year in enumerate(years):
if year not in v2021_years:
for yr in v2021_years:
if year not in bundle_years:
for yr in bundle_years:
if int(yr[:4]) <= int(year) <= int(yr[-4:]):
years[index] = yr

Expand All @@ -298,6 +303,10 @@ def download_oecd(
]
if set(filenames).issubset(os.listdir(storage_folder)):
continue
if version == "v2023":
filename = "ICIO-" + yy + "-extended.zip"
filenames = [f"{yr}.zip" for yr in range(int(yy[:4]), int(yy[-4:]) + 1)]

elif filename in os.listdir(storage_folder):
continue

Expand All @@ -308,10 +317,20 @@ def download_oecd(
for chunk in req.iter_content(1024 * 5):
lf.write(chunk)

if version == "v2021":
if version == "v2021" or version == "v2023":
with zipfile.ZipFile(storage_file, "r") as zip_ref:
zip_ref.extractall(storage_folder)
os.remove(storage_file)
if version == "v2023":
for file in os.listdir(storage_folder):
absolute_path = os.path.join(storage_folder, file)
os.rename(
os.path.join(storage_folder, file),
os.path.join(
storage_folder,
"ICIO2023_" + file.replace("_SML", ""),
),
)

downlog._add_fileio(
"Downloaded {} to {}".format(
Expand Down
12 changes: 8 additions & 4 deletions pymrio/tools/ioparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,7 +1535,7 @@ def parse_oecd(path, year=None):

path = os.path.abspath(os.path.normpath(str(path)))

oecd_file_starts = ["ICIO2016_", "ICIO2018_", "ICIO2021_"]
oecd_file_starts = ["ICIO2016_", "ICIO2018_", "ICIO2021_", "ICIO2023_"]

# determine which oecd file to be parsed
if not os.path.isdir(path):
Expand Down Expand Up @@ -1609,9 +1609,11 @@ def parse_oecd(path, year=None):
oecd_raw.drop(oecd_totals_row, axis=0, errors="ignore", inplace=True)

# Important - these must not match any country or industry name
factor_input = oecd_raw.filter(regex="VALU|TAX", axis=0)
factor_input_exact = oecd_raw.filter(items=["TLS", "VA"], axis=0)
factor_input_regex = oecd_raw.filter(regex="VALU|TAX", axis=0)
factor_input = pd.concat([factor_input_exact, factor_input_regex], axis=0)
final_demand = oecd_raw.filter(
regex="HFCE|NPISH|NPS|GGFC|GFCF|INVNT|INV|DIRP|DPABR|FD|P33|DISC", axis=1
regex="HFCE|NPISH|NPS|GGFC|GFCF|INVNT|INV|DIRP|DPABR|FD|P33|DISC|OUT", axis=1
)

Z = oecd_raw.loc[
Expand All @@ -1624,7 +1626,9 @@ def parse_oecd(path, year=None):
F_Y_factor_input = factor_input.loc[:, final_demand.columns]
Y = final_demand.loc[final_demand.index.difference(F_factor_input.index), :]

Z_index = pd.MultiIndex.from_tuples(tuple(ll) for ll in Z.index.str.split("_"))
Z_index = pd.MultiIndex.from_tuples(
tuple(ll) for ll in Z.index.map(lambda x: x.split("_", maxsplit=1))
)
Z_columns = Z_index.copy()
Z_index.names = IDX_NAMES["Z_row"]
Z_columns.names = IDX_NAMES["Z_col"]
Expand Down

0 comments on commit 9de5936

Please sign in to comment.