From 227d57c87d02f763fbb16b7e4cd4081ff99cc9d6 Mon Sep 17 00:00:00 2001 From: Jaime Oliver Date: Wed, 6 Dec 2023 16:18:25 -0500 Subject: [PATCH 1/2] closes #132 --- pymrio/tools/iodownloader.py | 59 +++++++++++++++++++++++++----------- pymrio/tools/ioparser.py | 12 +++++--- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/pymrio/tools/iodownloader.py b/pymrio/tools/iodownloader.py index 3ab262d8..da9e50b5 100644 --- a/pymrio/tools/iodownloader.py +++ b/pymrio/tools/iodownloader.py @@ -92,6 +92,13 @@ "2010-2014": "https://stats.oecd.org/wbos/fileview2.aspx?IDFile=2c2f499f-5703-4034-9457-2f7518e8f2fc", "2015-2018": "https://stats.oecd.org/wbos/fileview2.aspx?IDFile=59a3d7f2-3f23-40d5-95ca-48da84c0f861", }, + "v2023": { + "1995-2000": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=88e186db-b5ed-4d08-a1f1-bb2c817caae4", + "2001-2005": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=c0e89731-540c-4848-bc2d-40ef8a6a0e87", + "2006-2010": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=993087cc-be0a-4f62-85b1-44644583febe", + "2011-2015": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=8e933dee-1431-4c4d-895a-bc47a6e495a7", + "2016-2020": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d83434d0-a5b5-46bf-8a9f-5b6f364da6f5", + }, }, } @@ -242,37 +249,35 @@ def download_oecd( os.makedirs(storage_folder, exist_ok=True) if type(version) is int: - version = str(version) - - if ("8" in version) or ("4" in version): - version = "v2018" - elif ("3" in version) or ("6" in version): - version = "v2016" - elif "21" in version: - version = "v2021" - else: - raise ValueError("Version not understood") + version = "v" + str(version) - v2021_years = ["1995-1999", "2000-2004", "2005-2009", "2010-2014", "2015-2018"] + if not version in ("v2016", "v2018", "v2021", "v2023"): + raise ValueError("Version not understood") if type(years) is int or type(years) is str: years = [years] + if version == "v2021": + bundle_years = ["1995-1999", "2000-2004", "2005-2009", "2010-2014", "2015-2018"] + elif version == "v2023": + bundle_years = ["1995-2000", "2001-2005", "2006-2010", "2011-2015", "2016-2020"] + if not years: if version == "v2018": years = range(2005, 2016) elif version == "v2021": - years = v2021_years - + years = bundle_years.copy() + elif version == "v2023": + years = bundle_years.copy() else: years = range(1995, 2012) years = [str(yy) for yy in years] - if version == "v2021": + if version == "v2021" or version == "v2023": for index, year in enumerate(years): - if year not in v2021_years: - for yr in v2021_years: + if year not in bundle_years: + for yr in bundle_years: if int(yr[:4]) <= int(year) <= int(yr[-4:]): years[index] = yr @@ -298,6 +303,10 @@ def download_oecd( ] if set(filenames).issubset(os.listdir(storage_folder)): continue + if version == "v2023": + filename = "ICIO-" + yy + "-extended.zip" + filenames = [f"{yr}.zip" for yr in range(int(yy[:4]), int(yy[-4:]) + 1)] + elif filename in os.listdir(storage_folder): continue @@ -308,10 +317,26 @@ def download_oecd( for chunk in req.iter_content(1024 * 5): lf.write(chunk) - if version == "v2021": + if version == "v2021" or version == "v2023": with zipfile.ZipFile(storage_file, "r") as zip_ref: zip_ref.extractall(storage_folder) os.remove(storage_file) + if version == "v2023": + for file in os.listdir(storage_folder): + absolute_path = os.path.join(storage_folder, file) + if file.endswith(".zip"): + with zipfile.ZipFile(absolute_path, "r") as zip_ref: + zip_ref.extractall(storage_folder) + os.rename( + os.path.join( + storage_folder, file.replace(".zip", ".csv") + ), + os.path.join( + storage_folder, + "ICIO2023_" + file.replace(".zip", ".csv"), + ), + ) + os.remove(absolute_path) downlog._add_fileio( "Downloaded {} to {}".format( diff --git a/pymrio/tools/ioparser.py b/pymrio/tools/ioparser.py index 2957025e..7bed9b0d 100644 --- a/pymrio/tools/ioparser.py +++ b/pymrio/tools/ioparser.py @@ -1535,7 +1535,7 @@ def parse_oecd(path, year=None): path = os.path.abspath(os.path.normpath(str(path))) - oecd_file_starts = ["ICIO2016_", "ICIO2018_", "ICIO2021_"] + oecd_file_starts = ["ICIO2016_", "ICIO2018_", "ICIO2021_", "ICIO2023_"] # determine which oecd file to be parsed if not os.path.isdir(path): @@ -1609,9 +1609,11 @@ def parse_oecd(path, year=None): oecd_raw.drop(oecd_totals_row, axis=0, errors="ignore", inplace=True) # Important - these must not match any country or industry name - factor_input = oecd_raw.filter(regex="VALU|TAX", axis=0) + factor_input_exact = oecd_raw.filter(items=["TLS", "VA"], axis=0) + factor_input_regex = oecd_raw.filter(regex="VALU|TAX", axis=0) + factor_input = pd.concat([factor_input_exact, factor_input_regex], axis=0) final_demand = oecd_raw.filter( - regex="HFCE|NPISH|NPS|GGFC|GFCF|INVNT|INV|DIRP|DPABR|FD|P33|DISC", axis=1 + regex="HFCE|NPISH|NPS|GGFC|GFCF|INVNT|INV|DIRP|DPABR|FD|P33|DISC|OUT", axis=1 ) Z = oecd_raw.loc[ @@ -1624,7 +1626,9 @@ def parse_oecd(path, year=None): F_Y_factor_input = factor_input.loc[:, final_demand.columns] Y = final_demand.loc[final_demand.index.difference(F_factor_input.index), :] - Z_index = pd.MultiIndex.from_tuples(tuple(ll) for ll in Z.index.str.split("_")) + Z_index = pd.MultiIndex.from_tuples( + tuple(ll) for ll in Z.index.map(lambda x: x.split("_", maxsplit=1)) + ) Z_columns = Z_index.copy() Z_index.names = IDX_NAMES["Z_row"] Z_columns.names = IDX_NAMES["Z_col"] From 15367efe27bf8c408e4c91a197a31d903acb117a Mon Sep 17 00:00:00 2001 From: Jaime Oliver Date: Thu, 11 Apr 2024 13:43:58 +0200 Subject: [PATCH 2/2] Updated urls --- pymrio/tools/iodownloader.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/pymrio/tools/iodownloader.py b/pymrio/tools/iodownloader.py index da9e50b5..db1108ae 100644 --- a/pymrio/tools/iodownloader.py +++ b/pymrio/tools/iodownloader.py @@ -93,11 +93,11 @@ "2015-2018": "https://stats.oecd.org/wbos/fileview2.aspx?IDFile=59a3d7f2-3f23-40d5-95ca-48da84c0f861", }, "v2023": { - "1995-2000": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=88e186db-b5ed-4d08-a1f1-bb2c817caae4", - "2001-2005": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=c0e89731-540c-4848-bc2d-40ef8a6a0e87", - "2006-2010": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=993087cc-be0a-4f62-85b1-44644583febe", - "2011-2015": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=8e933dee-1431-4c4d-895a-bc47a6e495a7", - "2016-2020": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d83434d0-a5b5-46bf-8a9f-5b6f364da6f5", + "1995-2000": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d26ad811-5b58-4f0c-a4e3-06a1469e475c", + "2001-2005": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=7cb93dae-e491-4cfd-ac67-889eb7016a4a", + "2006-2010": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=ea165bfb-3a85-4e0a-afee-6ba8e6c16052", + "2011-2015": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=1f791bc6-befb-45c5-8b34-668d08a1702a", + "2016-2020": "http://stats.oecd.org/wbos/fileview2.aspx?IDFile=d1ab2315-298c-4e93-9a81-c6f2273139fe", }, }, } @@ -205,7 +205,7 @@ def _download_urls( def download_oecd( - storage_folder, version="v2021", years=None, overwrite_existing=False + storage_folder, version="v2023", years=None, overwrite_existing=False ): """Downloads the OECD ICIO tables @@ -324,19 +324,13 @@ def download_oecd( if version == "v2023": for file in os.listdir(storage_folder): absolute_path = os.path.join(storage_folder, file) - if file.endswith(".zip"): - with zipfile.ZipFile(absolute_path, "r") as zip_ref: - zip_ref.extractall(storage_folder) - os.rename( - os.path.join( - storage_folder, file.replace(".zip", ".csv") - ), - os.path.join( - storage_folder, - "ICIO2023_" + file.replace(".zip", ".csv"), - ), - ) - os.remove(absolute_path) + os.rename( + os.path.join(storage_folder, file), + os.path.join( + storage_folder, + "ICIO2023_" + file.replace("_SML", ""), + ), + ) downlog._add_fileio( "Downloaded {} to {}".format(