diff --git a/doc/source/notebooks/autodownload.ipynb b/doc/source/notebooks/autodownload.ipynb index 3e4fe93b..881d7f9b 100644 --- a/doc/source/notebooks/autodownload.ipynb +++ b/doc/source/notebooks/autodownload.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -9,7 +8,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -18,7 +16,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -26,7 +23,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -34,7 +30,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -42,7 +37,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -61,7 +55,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -80,7 +73,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -101,7 +93,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -112,7 +103,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,7 +147,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -216,7 +205,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": { "lines_to_next_cell": 2 @@ -226,7 +214,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": { "lines_to_next_cell": 2 @@ -236,7 +223,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -247,7 +233,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -255,7 +240,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -272,7 +256,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -289,7 +272,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -306,7 +288,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -353,7 +334,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -363,7 +343,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -380,7 +359,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -433,7 +411,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -528,7 +505,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -550,7 +526,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -592,7 +567,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -600,7 +574,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -608,7 +581,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -618,7 +590,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -636,7 +607,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -653,7 +623,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -675,7 +644,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -697,7 +665,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -714,7 +681,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -726,7 +692,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -764,7 +729,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -772,7 +736,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -785,7 +748,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -796,7 +758,10 @@ "cell_type": "code", "execution_count": 14, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -806,7 +771,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -848,7 +812,10 @@ "cell_type": "code", "execution_count": 16, "metadata": { - "collapsed": false + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } }, "outputs": [ { @@ -880,7 +847,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -896,7 +862,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -904,7 +869,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -913,7 +877,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -921,7 +884,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -932,7 +894,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -940,7 +901,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -957,7 +917,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -974,7 +933,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -991,7 +949,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1033,7 +990,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1070,7 +1026,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.10.12" }, "toc-showmarkdowntxt": false, "toc-showtags": false diff --git a/doc/source/notebooks/autodownload.py b/doc/source/notebooks/autodownload.py new file mode 100644 index 00000000..aa27a386 --- /dev/null +++ b/doc/source/notebooks/autodownload.py @@ -0,0 +1,281 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # Automatic downloading of MRIO databases + +# Pymrio includes functions to automatically download some of the publicly available global EE MRIO databases. +# This is currently implemented for [EXIOBASE 3](https://doi.org/10.5281/zenodo.3583070), [OECD](https://www.oecd.org/sti/ind/inter-country-input-output-tables.htm) and [WIOD](http://www.wiod.org). + +# The functions described here download the raw data files. Thus, they can also be used for post processing by other tools. + +# ## EXIOBASE 3 download + +# EXIOBASE 3 is licensed under the [Creative Commons Attribution ShareAlike 4.0 International-license](https://creativecommons.org/licenses/by-sa/4.0/legalcode). Thus you can use EXIOBASE 3 for analyses as well as remix, tweak, and build uponit, even commercially, as long as you give credit to the EXIOBASE compilers and share your results/new databases under the same licence. The suggested citation for EXIOBASE 3 is [Stadler et al 2018](https://doi.org/10.1111/jiec.12715). You can find more information, links to documentation as well as concordance matrices on the [EXIOBASE 3 Zenodo repository](https://doi.org/10.5281/zenodo.3583070). The download function of pymrio also downloads the files from this repository. + +# To download, start with: + +import pymrio + +# and define a folder for storing the data: + +exio3_folder = "/tmp/mrios/autodownload/EXIO3" + +# With that we can start the download with (this might take a moment): + +exio_downloadlog = pymrio.download_exiobase3( + storage_folder=exio3_folder, system="pxp", years=[2011, 2012] +) + +# The command above will download the latest EXIOBASE 3 tables in the product +# by product classification (system='pxp') for the years 2011 and 2012. Both +# parameters (system and years) are optional and when omitted the function will +# download all available files. + +# The function returns a log of the download data (which is stored in ```download_log.json``` in the download folder). +# You can inspect the meta data by: + +# + jupyter={"outputs_hidden": false} +print(exio_downloadlog) +# - + +# By default, the download_exiobase3 fetches the latest version of EXIOBASE3 +# available at the [EXIOBASE 3 Zenodo repository](https://doi.org/10.5281/zenodo.3583070). +# To download one of the previous versions specify the DOI with the doi +# parameter: + +prev_version_storage = "/tmp/mrios/autodownload/EXIO3_7" +exio_downlog_37 = pymrio.download_exiobase3( + storage_folder=prev_version_storage, + system="ixi", + years=2004, + doi="10.5281/zenodo.3583071", +) + +print(exio_downlog_37) + +# We also recommend to specifiy a specific DOI version even when using the latest version of EXIOBASE. In that way the used version is documented in the code and can be reproduced in case a newer EXIOBASE version becomes available. + + +# ## WIOD download + + +# **DUE TO A RESTRUCTERING OF THE WIOD WEBPAGE THIS IS CURRENTLY BROKEN.** +# +# +# WIOD is licensed under the [Creative Commons Attribution 4.0 International-license](http://creativecommons.org/licenses/by/4.0/). Thus you can remix, tweak, and build upon WIOD, even commercially, as long as you give credit to WIOD. The WIOD web-page suggest to cite [Timmer et al. 2015](http://doi.wiley.com/10.1111/roie.12178) when you use the database. You can find more information on the [WIOD webpage](http://www.wiod.org). + +# The download function for WIOD currently processes the [2013 release version of WIOD](http://www.wiod.org/database/wiots13). + +# To download, start with: + +import pymrio + +# Define a folder for storing the data + +wiod_folder = "/tmp/mrios/autodownload/WIOD2013" + +# And start the download with (this will take a couple of minutes): + +wiod_meta = pymrio.download_wiod2013(storage_folder=wiod_folder) + +# The function returns the meta data for the release (which is stored in ```metadata.json``` in the download folder). +# You can inspect the meta data by: + +# + jupyter={"outputs_hidden": false} +print(wiod_meta) +# - + +# The WIOD database provide data for several years and satellite accounts. +# In the default case, all of them are downloaded. You can, however, specify +# years and satellite account. + +# You can specify the years as either int or string (2 or 4 digits): + +res_years = [97, 2004, "2005"] + +# The available satellite accounts for WIOD are listed in the ```WIOD_CONFIG```. +# To get them import this dict by: + +from pymrio.tools.iodownloader import WIOD_CONFIG + +# + jupyter={"outputs_hidden": false} +WIOD_CONFIG +# - + +# To restrict this list, you can either copy paste the urls or automatically select the accounts: + +sat_accounts = ["EU", "CO2"] +res_satellite = [ + sat + for sat in WIOD_CONFIG["satellite_urls"] + if any(acc in sat for acc in sat_accounts) +] + +# + jupyter={"outputs_hidden": false} +res_satellite +# - + +wiod_meta_res = pymrio.download_wiod2013( + storage_folder="/tmp/foo_folder/WIOD2013_res", + years=res_years, + satellite_urls=res_satellite, +) + +# + jupyter={"outputs_hidden": false} +print(wiod_meta_res) +# - + +# Subsequent download will only catch files currently not present in the folder, e.g.: + +additional_years = [2000, 2001] +wiod_meta_res = pymrio.download_wiod2013( + storage_folder="/tmp/foo_folder/WIOD2013_res", + years=res_years + additional_years, + satellite_urls=res_satellite, +) + +# only downloads the years given in ```additional_years```, appending these downloads to the meta data file. + +# + jupyter={"outputs_hidden": false} +print(wiod_meta_res) +# - + +# To catch all files, irrespective if present in the storage_folder or not pass ```overwrite_existing=True``` + +# ## OECD download + +# The OECD Inter-Country Input-Output tables (ICIO) are available on the [OECD webpage.](https://www.oecd.org/sti/ind/inter-country-input-output-tables.htm) There is no specific license given for the these tables, but the webpage state that "Data can be downloaded for free" (per June 2023) and to cite this database "OECD (2021), OECD Inter-Country Input-Output Database, http://oe.cd/icio". +# +# Currently OECD provides three versions 2016, 2018, and 2021, which are available through pymrio +# + +# To download the data, we first define the folder for storing the data (these will be created if they do not exist yet): + +oecd_folder_v2021 = "/tmp/mrios/autodownload/OECD_2021" +oecd_folder_v2018 = "/tmp/mrios/autodownload/OECD_2018" + +# Than we can start the download with + +log_2021 = pymrio.download_oecd(storage_folder=oecd_folder_v2021) + +# By default, the 2021 release of the OECD - ICIO tables are downloaded. +# To retrieve other versions, simply pass "version='v<version date>'", for example to get the 2018 release, pass "version='v2018'" . +# +# As for WIOD, specific years can be specified by passing a list of years: + +log_2018 = pymrio.download_oecd( + storage_folder=oecd_folder_v2018, version="v2016", years=[2003, 2008] +) + +# However, the 2021 release of OECD is only available in 5-year bundles starting from 1995 to 2018 +# 1995-1999, 2000-2004, 2005-2009, 2010-2014, 2015-2018 +# +# Therefore whenever 2021 release is used, it is recommended to pass years as a list of the wanted bundles in string forms: + +log_2021 = pymrio.download_oecd( + storage_folder=oecd_folder_v2021, years=["1995-1999", "2015-2018"] +) + +# Otherwise the corresponding bundles for the entered years would be downloaded, for example if year 2003 is requested, the bundle 2000-2004 would be downloaded + +log_2021 = pymrio.download_oecd(storage_folder=oecd_folder_v2021, years=[2003, 2012]) + +# The bundles 2000-2004 and 2010-2014 would be downloaded +#
+#
+#
+#
+ +# The function returns a log for the download progress and MRIO info: + +print(log_2021) + +# ## Eora26 download + +# Eora26 provides a simplified, symmetric version of the full Eora database. +# +# Downloading the Eora data requires registration through the [Eora website (worldmrio)](http://www.worldmrio.com) . +# Currently (August 2023), open Eora26 data are only offered for the years 1990 - 2016. +# +# Therefore, you have to have the Eora account email and password in advance to use the downloader. + +# Setup the download with + +# + jupyter={"outputs_hidden": true} +import pymrio + +eora_folder = "/tmp/mrios/eora26" +# - + +# Start the download with (passing the email and password of the eora account) +# If you pass invalid email or password, you get a prompt about that and asked to enter them again, otherwise the function will download the data (this can take some minutes) +# +# This will download all available years 1990 - 2016 + +eora_log = pymrio.download_eora26( + storage_folder=eora_folder, + email="", + password="", +) + +# + jupyter={"outputs_hidden": false} +print(eora_log) +# - + +# As in the case of the WIOD downloader, you can restrict the +# +# 1) years to download by passing ```years=[list of int or str - 4 digits]``` +# +# 2) force the overwriting of existing files by passing ```overwrite_existing=True``` +# +# Satellite accounts, however, can not be restricted since they are included in one file. +# +# The tables are in basic prices as it is the only price system available to download for Eora26. + +# ## EXIOBASE download (previous version 1 and 2) + +# Previous EXIOBASE version requires registration prior to download and therefore an automatic download has not been implemented. +# For further information check the download instruction at the [EXIOBASE example notebook.](working_with_exiobase.ipynb#Getting-EXIOBASE) + +# ## GLORIA download + +# The Global Resource Input Output Assessment (GLORIA) database are available to download through a dropbox folder [GLORIA database.](https://www.dropbox.com/sh/o4fxq94n7grvdbk/AABhKvEVx0UuMvz4dQ4NlWC8a?d) There is no specific licence given for the this database. +# +# Currently (as per April 2023), there are four available versions 53, 54, 55, and 57 (Release 056 was only +# distributed to a limited number of users for feedback). +# + +# The download function can (as per April 2023) download all four versions + +# To download, start with: + +import pymrio + +# Define a folder for storing the data + +gloria_folder = "/tmp/mrios/autodownload/GLORIA2014" + +# And start the download with (this will take a couple of minutes): + +gloria_log_2014 = pymrio.download_gloria(storage_folder=gloria_folder) + +# The function returns the download log data for the release (which is stored in ```download_log.json``` in the download folder). +# You can inspect the log data by: + +print(gloria_log_2014) + +# By default the function downloads all years for the final release (57 as per April 2023), but the year and version can be specified by passing them to the function + +gloria_log_v053_2012 = pymrio.download_gloria(gloria_folder, year=2012, version=53) diff --git a/doc/source/notebooks/extract_data.ipynb b/doc/source/notebooks/extract_data.ipynb new file mode 100644 index 00000000..faee60e7 --- /dev/null +++ b/doc/source/notebooks/extract_data.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6d3cfbc9-bc67-432f-badc-1daec4cb4779", + "metadata": {}, + "source": [ + "# Extract data from Pymrio" + ] + }, + { + "cell_type": "markdown", + "id": "bcb8ce71-6b45-4ccc-81ad-866bc0d6c6f8", + "metadata": {}, + "source": [ + "This notebook shows how to extract specific data from the pymrio object for further processing in Python. For exporting/saving the data to another file format [see the Notebook on loading/saving/exporting](./load_save_export.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "564fd5e1-8a3f-449a-9759-d36aa83473ce", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04097807-b375-42eb-b5c8-d0c457502a70", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4f298de3-3641-44bb-88bb-c428d9cf5693", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pymrio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a38719d5-011c-4dff-969e-f915a205fd44", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "mrio = pymrio.load_test()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6aadba4-ef17-40d2-aaec-845e274026b7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/source/notebooks/extract_data.py b/doc/source/notebooks/extract_data.py new file mode 100644 index 00000000..ebd0d99b --- /dev/null +++ b/doc/source/notebooks/extract_data.py @@ -0,0 +1,67 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Extract data from Pymrio + +# %% [markdown] +# This notebook shows how to extract specific data from the pymrio object for further processing in Python. For exporting/saving the data to another file format see [the notebook on saving/loading/exporting data.](./load_save_export.ipynb) + +# %% +import pymrio + +# %% +mrio = pymrio.load_test().calc_all() + +### Basic pandas indexing of pymrio tables + +# %% [markdown] +# Since pymrio is built on top of pandas, we can use the pandas functions to extract data from the pymrio object. For example, to access the part of the A matrix from the region 2 we can use: + +# %% +A_reg2 = mrio.A.loc["reg2", "reg2"] +A_reg2 + +# %% [markdown] +# Most tables are indexed via a multiindex, in case of the A matrix the index is a tuple of the region and the sector. +# To access all technical coefficients (column) data for mining from all regions we can use: + + +# %% +A_mining = mrio.A.loc[:, (slice(None), "mining")] +A_mining + +# %% [markdown] +# For further information on the pandas multiindex see the [pandas documentation on advanced indexing.](https://pandas.pydata.org/docs/user_guide/advanced.html) + +### Extracting data across extension tables + +# %% [markdown] +# Pymrio includes methods for bulk extraction of data across extension tables. These can either work on a specific extension or across all extensions of the system. + +# %% [markdown] +#### Extracting from a specific extension + + +# %% [markdown] +# Here we use use the `extract` method available in the extension object. +# This expect a list of rows (index) to extract. + +row = mrio.emissions.get_rows() + +df_extract = mrio.emissions.extract(row, return_type="dataframe") +ext_extract = mrio.emissions.extract(row, return_type="extension") + +# CONT: DESRIBE STUFF ABOVE +# For example, to extract the total value added for all regions and sectors we can use: diff --git a/doc/source/notebooks/load_save_export.ipynb b/doc/source/notebooks/load_save_export.ipynb index d45e5ba8..5c4bcbae 100644 --- a/doc/source/notebooks/load_save_export.ipynb +++ b/doc/source/notebooks/load_save_export.ipynb @@ -723,7 +723,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -737,7 +737,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/doc/source/notebooks/load_save_export.py b/doc/source/notebooks/load_save_export.py new file mode 100644 index 00000000..593c892c --- /dev/null +++ b/doc/source/notebooks/load_save_export.py @@ -0,0 +1,212 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Loading, saving and exporting data + +# %% [markdown] +# Pymrio includes several functions for data reading and storing. This section presents the methods to use for saving and loading data already in a pymrio compatible format. For parsing raw MRIO data see the different tutorials for [working with available MRIO databases](../handling.rst). + +# %% [markdown] +# Here, we use the included small test MRIO system to highlight the different function. The same functions are available for any MRIO loaded into pymrio. Expect, however, significantly decreased performance due to the size of real MRIO system. + +# %% +import pymrio +import os + +io = pymrio.load_test().calc_all() + +# %% [markdown] +# ## Basic save and read + +# %% [markdown] +# To save the full system, use: + +# %% +save_folder_full = "/tmp/testmrio/full" +io.save_all(path=save_folder_full) + +# %% [markdown] +# To read again from that folder do: + +# %% +io_read = pymrio.load_all(path=save_folder_full) + +# %% [markdown] +# The fileio activities are stored in the included meta data history field: + +# %% +io_read.meta + +# %% [markdown] +# ## Storage format + +# %% [markdown] +# Internally, pymrio stores data in csv format, with the 'economic core' data in the root and each satellite account in a subfolder. Metadata as file as a file describing the data format ('file_parameters.json') are included in each folder. + +# %% +import os + +os.listdir(save_folder_full) + +# %% [markdown] +# The file format for storing the MRIO data can be switched to a binary pickle format with: + +# %% +save_folder_bin = "/tmp/testmrio/binary" +io.save_all(path=save_folder_bin, table_format="pkl") +os.listdir(save_folder_bin) + +# %% [markdown] +# This can be used to reduce the storage space required on the disk for large MRIO databases. + +# %% [markdown] +# ## Archiving MRIOs databases + +# %% [markdown] +# To archive a MRIO system after saving use pymrio.archive: + +# %% +mrio_arc = "/tmp/testmrio/archive.zip" + +# Remove a potentially existing archive from before +try: + os.remove(mrio_arc) +except FileNotFoundError: + pass + +pymrio.archive(source=save_folder_full, archive=mrio_arc) + +# %% [markdown] +# Data can be read directly from such an archive by: + +# %% +tt = pymrio.load_all(mrio_arc) + +# %% [markdown] +# Currently data can not be saved directly into a zip archive. +# It is, however, possible to remove the source files after archiving: + +# %% +tmp_save = "/tmp/testmrio/tmp" + +# Remove a potentially existing archive from before +try: + os.remove(mrio_arc) +except FileNotFoundError: + pass + +io.save_all(tmp_save) + +print("Directories before archiving: {}".format(os.listdir("/tmp/testmrio"))) +pymrio.archive(source=tmp_save, archive=mrio_arc, remove_source=True) +print("Directories after archiving: {}".format(os.listdir("/tmp/testmrio"))) + +# %% [markdown] +# Several MRIO databases can be stored in the same archive: + +# %% +# Remove a potentially existing archive from before +try: + os.remove(mrio_arc) +except FileNotFoundError: + pass + +tmp_save = "/tmp/testmrio/tmp" + +io.save_all(tmp_save) +pymrio.archive( + source=tmp_save, archive=mrio_arc, path_in_arc="version1/", remove_source=True +) +io2 = io.copy() +del io2.emissions +io2.save_all(tmp_save) +pymrio.archive( + source=tmp_save, archive=mrio_arc, path_in_arc="version2/", remove_source=True +) + +# %% [markdown] +# When loading from an archive which includes multiple MRIO databases, specify +# one with the parameter 'path_in_arc': + +# %% +io1_load = pymrio.load_all(mrio_arc, path_in_arc="version1/") +io2_load = pymrio.load_all(mrio_arc, path_in_arc="version2/") + +print( + "Extensions of the loaded io1 {ver1} and of io2: {ver2}".format( + ver1=sorted(io1_load.get_extensions()), ver2=sorted(io2_load.get_extensions()) + ) +) + +# %% [markdown] +# The pymrio.load function can be used directly to only a specific satellite account +# of a MRIO database from a zip archive: + +# %% +emissions = pymrio.load(mrio_arc, path_in_arc="version1/emissions") +print(emissions) + +# %% [markdown] +# The archive function is a wrapper around python.zipfile module. +# There are, however, some differences to the defaults choosen in the original: +# +# - In contrast to [zipfile.write](https://docs.python.org/3/library/zipfile.html), +# pymrio.archive raises an +# error if the data (path + filename) are identical in the zip archive. +# Background: the zip standard allows that files with the same name and path +# are stored side by side in a zip file. This becomes an issue when unpacking +# this files as they overwrite each other upon extraction. +# +# - The standard for the parameter 'compression' is set to ZIP_DEFLATED +# This is different from the zipfile default (ZIP_STORED) which would +# not give any compression. +# See the [zipfile docs](https://docs.python.org/3/library/zipfile.html#zipfile-objects) +# for further information. +# Depending on the value given for the parameter 'compression' +# additional modules might be necessary (e.g. zlib for ZIP_DEFLATED). +# Futher information on this can also be found in the zipfile python docs. + +# %% [markdown] +# ## Storing or exporting a specific table or extension + +# %% [markdown] +# Each extension of the MRIO system can be stored separetly with: + +# %% +save_folder_em = "/tmp/testmrio/emissions" + +# %% +io.emissions.save(path=save_folder_em) + +# %% [markdown] +# This can then be loaded again as separate satellite account: + +# %% +emissions = pymrio.load(save_folder_em) + +# %% +emissions + +# %% +emissions.D_cba + +# %% [markdown] +# As all data in pymrio is stored as [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html), the full pandas stack for exporting tables is available. For example, to export a table as excel sheet use: + +# %% +io.emissions.D_cba.to_excel("/tmp/testmrio/emission_footprints.xlsx") + +# %% [markdown] +# For further information see the pandas [documentation on import/export](https://pandas.pydata.org/pandas-docs/stable/io.html). diff --git a/doc/source/notebooks/stressor_characterization.ipynb b/doc/source/notebooks/stressor_characterization.ipynb index 1cfa9aab..a5ad5785 100644 --- a/doc/source/notebooks/stressor_characterization.ipynb +++ b/doc/source/notebooks/stressor_characterization.ipynb @@ -2319,7 +2319,7 @@ "formats": "ipynb,py:light" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2333,7 +2333,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/doc/source/notebooks/stressor_characterization.py b/doc/source/notebooks/stressor_characterization.py new file mode 100644 index 00000000..034ebe42 --- /dev/null +++ b/doc/source/notebooks/stressor_characterization.py @@ -0,0 +1,175 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# # Characterization of stressors + +# The characterization of stressors is a standard procedure to calculate the environmental and social impacts of economic activity. This is usually accomplished by multiplying (matrix-multiplication) the stressor-matrix with a characterization-matrix. Doing that in the matrix forms requires a 1:1 correspondence of the columns of the characterization matrix to the rows of the stressor-matrix. + +# Pymrio uses a different approach with matching the strings of the +# characterization table (given in long-format) to the available stressors. By +# doing that, the order of the entries in the characterization-table becomes +# unimportant. +# This implementation also allows to use characterization tables which includes +# characterization for stressors not present in the given satellite account. All +# characterizations relying on not available stressor will be automatically +# removed. + +# ## Example + +# For this example we use the test MRIO included in Pymrio. We also need +# the Pandas library for loading the characterization table and pathlib for some folder manipulation. + + +from pathlib import Path + +import pandas as pd + + +import pymrio +from pymrio.core.constants import PYMRIO_PATH # noqa + +# To load the test MRIO we use: + +io = pymrio.load_test() + +# and the characterization table with some foo factors can be loaded by + +charact_table = pd.read_csv( + (PYMRIO_PATH["test_mrio"] / Path("concordance") / "emissions_charact.tsv"), + sep="\t", +) +charact_table + +# This table contains the columns 'stressor' and 'compartment' which correspond +# to the index names of the test_mrio emission satellite accounts: + +io.emissions.F + +# Theses index-names / columns-names need to match in order to match +# characterization factors to the stressors. + +# The other columns names can be passed to the characterization method. By default the method assumes the following column names: +# +# - impact: name of the characterization/impact +# - factor: the numerical (float) multiplication value for a specific stressor to derive the impact/characterized account +# - impact_unit: the unit of the calculated characterization/impact +# +# Alternative names can be passed through the parameters +# *characterized_name_column*, *characterization_factors_column* and *characterized_unit_column*. +# +# Note, that units of stressor are currently not checked - units as given in +# the satellite account to be characterized are assumed. These can be seen by: + +io.emissions.unit + +# Also note, that the charact_table contains a characterization called 'total +# emissions', for which the calculation requires a stressor not present in the +# satellite account. This will be automatically omitted. + +# To calculate the characterization we use + +impacts = io.emissions.characterize(charact_table, name="impacts") + +# The parameter *name* is optional, if omitted the name will be set to +# extension_name + _characterized + +# The method call above results in a pymrio.Extension which can be inspected with the usual +# methods, e.g.: + +impacts.F + +impacts.F_Y + +# and the extension can be added to the MRIO + +io.impacts = impacts + +# and used for subsequent calculations: + +io.calc_all() +io.impacts.D_cba + +# ### Characterizing calculated results + +# The characterize method can also be used to characterize already calculated +# results. This works in the same way: + +io_aly = pymrio.load_test().calc_all() + +io_aly.emissions.D_cba + +io_aly.impacts = io_aly.emissions.characterize(charact_table, name="impacts_new") + +# Note, that all results which can be characterized directly (all flow accounts +# like D_cba, D_pba, ...) are automatically included: + +io_aly.impacts.D_cba + +# Whereas coefficient accounts (M, S) are removed: + +io_aly.impacts.M + +# To calculated these use + +io_aly.calc_all() +io_aly.impacts.M + +# which will calculate the missing accounts. + +# For these calculations, the characterized accounts can also be used outside +# the MRIO system. Thus: + +independent_extension = io_aly.emissions.characterize(charact_table, name="impacts_new") + +type(independent_extension) + +independent_extension.M + +independent_extension_calc = independent_extension.calc_system(x=io_aly.x, Y=io_aly.Y) + +independent_extension.M + +# ## Inspecting the used characterization table + +# Pymrio automatically adjust the characterization table by removing accounts +# which can not be calculated using a given extension. The removed accounts are +# reported through a warning message (e.g. "WARNING:root:Impact >total +# emissions< removed - calculation requires stressors not present in extension +# >Emissions<" in the examples above). +# +# It is also possible, to obtain the cleaned characterization-table for +# inspection and further use. To do so: + +impacts = io.emissions.characterize( + charact_table, name="impacts", return_char_matrix=True +) + +# This changes the return type from a pymrio.Extension to a named tuple + +type(impacts) + +# with + +impacts.extension + +# and + +impacts.factors + +# The latter is the characterization table used for the calculation. +# +# For further information see the characterization docstring: + +print(io.emissions.characterize.__doc__) diff --git a/doc/source/notebooks/working_with_exiobase.ipynb b/doc/source/notebooks/working_with_exiobase.ipynb index b2edd95b..1723d3d3 100644 --- a/doc/source/notebooks/working_with_exiobase.ipynb +++ b/doc/source/notebooks/working_with_exiobase.ipynb @@ -807,7 +807,7 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -821,7 +821,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/doc/source/notebooks/working_with_exiobase.py b/doc/source/notebooks/working_with_exiobase.py new file mode 100644 index 00000000..841ebb1a --- /dev/null +++ b/doc/source/notebooks/working_with_exiobase.py @@ -0,0 +1,202 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Working with the EXIOBASE EE MRIO database + +# %% [markdown] +# ## Getting EXIOBASE + +# %% [markdown] +# EXIOBASE 1 (developed in the fp6 project [EXIOPOL](http://www.feem-project.net/exiopol/)), EXIOBASE 2 (outcome of the fp7 project [CREEA](http://www.creea.eu/)) and EXIOBASE 3 (outcome of the fp7 project [DESIRE](http://fp7desire.eu/)) are available on the [EXIOBASE webpage](http://www.exiobase.eu). +# +# You need to register before you can download the full dataset. +# +# Further information on the different EXIOBASE versions can be found in corresponding method papers. +# +# * EXIOBASE 1: [Tukker et al. 2013. Exiopol – Development and Illustrative Analyses of a Detailed Global MR EE SUT/IOT. Economic Systems Research 25(1), 50-70](https://doi.org/10.1080/09535314.2012.761952) +# * EXIOBASE 2: [Wood et al. 2015. Global Sustainability Accounting—Developing EXIOBASE for Multi-Regional Footprint Analysis. Sustainability 7(1), 138-163](https://doi.org/10.3390/su7010138) +# * EXIOBASE 3: [Stadler et al. 2018. EXIOBASE 3: Developing a Time Series of Detailed Environmentally Extended Multi‐Regional Input‐Output Tables. Journal of Industrial Ecology 22(3), 502-515](https://doi.org/10.1111/jiec.12715) + +# %% [markdown] +# ### EXIOBASE 1 + +# %% [markdown] +# To download EXIOBASE 1 for the use with pymrio, navigate to the [EXIOBASE webpage]( https://www.exiobase.eu) - section(tab) "Data Download" - "[EXIOBASE 1 - full dataset](http://exiobase.eu/index.php/data-download/exiobase1-year-2000-full-data-set)" and download either +# +# - [pxp_ita_44_regions_coeff_txt](https://www.exiobase.eu/index.php/data-download/exiobase1-year-2000-full-data-set/49-pxp-ita-44-regions-coeff-txt/file) for the product by product (pxp) MRIO system or +# +# - [ixi_fpa_44_regions_coeff_txt](https://www.exiobase.eu/index.php/data-download/exiobase1-year-2000-full-data-set/25-ixi-fpa-44-regions-coeff-txt/file) for the industry by industry (ixi) MRIO system or +# +# - [pxp_ita_44_regions_coeff_src_txt](https://www.exiobase.eu/index.php/data-download/exiobase1-year-2000-full-data-set/52-pxp-ita-44-regions-coeff-src-txt/file) for the product by product (pxp) MRIO system with emission data per source or +# +# - [ixi_fpa_44_regions_coeff_src_txt](https://www.exiobase.eu/index.php/data-download/exiobase1-year-2000-full-data-set/28-ixi-fpa-44-regions-coeff-src-txt/file) for the industry by industry (ixi) wMRIO system with emission data per source. +# +# The links above directly lead to the required file(s), but remember that you need to be logged in to access them. + +# %% [markdown] +# The Pymrio parser works with the compressed (zip) files as well as the unpacked files. If you want to unpack the files, make sure that you store them in different folders since they unpack in the current directory. + +# %% [markdown] +# ### EXIOBASE 2 + +# %% [markdown] +# EXIOBASE 3 is available at the [EXIOBASE webpage](http://www.exiobase.eu) at the section (tab) tab "Data Download" - "[EXIOBASE 2 - full dataset](http://exiobase.eu/index.php/data-download/exiobase2-year-2007-full-data-set)". +# +# You can download either +# +# +# - [MrIOT PxP ita coefficient version2 2 2](http://www.exiobase.eu/index.php/data-download/exiobase2-year-2007-full-data-set/79-mriot-pxp-ita-coefficient-version2-2-2/file) for the product by product (pxp) MRIO system or +# +# - [MrIOT IxI fpa coefficient version2 2 2](http://www.exiobase.eu/index.php/data-download/exiobase2-year-2007-full-data-set/78-mriot-ixi-fpa-coefficient-version2-2-2/file) for the industry by industry (ixi) MRIO system. +# +# The links above directly lead to the required file(s), but remember that you need to be logged in to access them. +# +# The pymrio parser works with the compressed (zip) files as well as the unpacked files. You can unpack the files together in one directory (unpacking creates a separate folder for each EXIOBASE 2 version). The unpacking of the PxP version also creates a folder "__MACOSX" - you can delete this folder. + +# %% [markdown] +# ### EXIOBASE 3 + +# %% [markdown] +# EXIOBASE 3 is available at the [EXIOBASE webpage](http://www.exiobase.eu) at the section (tab) tab "Data Download" - "[EXIOBASE 3 - monetary](http://exiobase.eu/index.php/data-download/exiobase3mon)". +# The EXIOBASE 3 parser works with both, the compressed zip archives and the extracted database. +# + +# %% [markdown] +# ## Parsing + +# %% +import pymrio + +# %% [markdown] +# For each publically available version of EXIOBASE pymrio provides a specific parser. +# All exiobase parser work with the zip archive (as downloaded from the exiobase webpage) or the extracted data. + +# %% [markdown] +# To parse **EXIOBASE 1** use: + +# %% jupyter={"outputs_hidden": false} +exio1 = pymrio.parse_exiobase1( + path="/tmp/mrios/exio1/zip/121016_EXIOBASE_pxp_ita_44_regions_coeff_txt.zip" +) + +# %% [markdown] +# The parameter 'path' needs to point to either the folder with the extracted EXIOBASE1 files for the downloaded zip archive. + +# %% [markdown] +# Similarly, **EXIOBASE 2** can be parsed by: + +# %% +exio2 = pymrio.parse_exiobase2( + path="/tmp/mrios/exio2/zip/mrIOT_PxP_ita_coefficient_version2.2.2.zip", + charact=True, + popvector="exio2", +) + +# %% [markdown] +# The additional parameter 'charact' specifies if the characterization matrix provided with EXIOBASE 2 should be used. This can be specified with True or False; in addition, a custom one can be provided. In the latter case, pass the full path to the custom characterisatio file to 'charact'. +# +# The parameter 'popvector' allows to pass information about the population per EXIOBASE2 country. This can either be a custom vector of, if 'exio2' is passed, the one provided with pymrio. + +# %% [markdown] +# **EXIOBASE 3** can be parsed by: + +# %% +exio3 = pymrio.parse_exiobase3(path="/tmp/mrios/exio3/zip/exiobase3.4_iot_2009_pxp.zip") + +# %% [markdown] +# Currently, no characterization or population vectors are provided for EXIOBASE 3. + +# %% [markdown] +# For the rest of the tutorial, we use *exio2*; deleting *exio1* and *exio3* to free some memory: + +# %% jupyter={"outputs_hidden": false} +del exio1 +del exio3 + +# %% [markdown] +# ## Exploring EXIOBASE + +# %% [markdown] +# After parsing a EXIOBASE version, the handling of the database is the same as for any IO. +# Here we use the parsed EXIOBASE2 to explore some characteristics of the EXIBOASE system. +# +# After reading the raw files, metadata about EXIOBASE can be accessed within the meta field: + +# %% jupyter={"outputs_hidden": false} +exio2.meta + +# %% [markdown] +# Custom points can be added to the history in the meta record. For example: + +# %% jupyter={"outputs_hidden": false} +exio2.meta.note("First test run of EXIOBASE 2") +exio2.meta + +# %% [markdown] +# To check for sectors, regions and extensions: + +# %% jupyter={"outputs_hidden": false} +exio2.get_sectors() + +# %% jupyter={"outputs_hidden": false} +exio2.get_regions() + +# %% jupyter={"outputs_hidden": false} +list(exio2.get_extensions()) + +# %% [markdown] +# ## Calculating the system and extension results + +# %% [markdown] +# The following command checks for missing parts in the system and calculates them. In case of the parsed EXIOBASE this includes A, L, multipliers M, footprint accounts, .. + +# %% jupyter={"outputs_hidden": false} +exio2.calc_all() + +# %% [markdown] +# ## Exploring the results + +# %% jupyter={"outputs_hidden": false} +import matplotlib.pyplot as plt + +plt.figure(figsize=(15, 15)) +plt.imshow(exio2.A, vmax=1e-3) +plt.xlabel("Countries - sectors") +plt.ylabel("Countries - sectors") +plt.show() + +# %% [markdown] +# The available impact data can be checked with: + +# %% jupyter={"outputs_hidden": false} +list(exio2.impact.get_rows()) + +# %% [markdown] +# And to get for example the footprint of a specific impact do: + +# %% jupyter={"outputs_hidden": false} +print(exio2.impact.unit.loc["global warming (GWP100)"]) +exio2.impact.D_cba_reg.loc["global warming (GWP100)"] + +# %% [markdown] +# ## Visualizing the data + +# %% jupyter={"outputs_hidden": false} +with plt.style.context("ggplot"): + exio2.impact.plot_account(["global warming (GWP100)"], figsize=(15, 10)) + plt.show() + +# %% [markdown] +# See the other notebooks for further information on [aggregation](../notebooks/aggregation_examples.ipynb) and [file io](../notebooks/load_save_export.ipynb). diff --git a/doc/source/notebooks/working_with_wiod.ipynb b/doc/source/notebooks/working_with_wiod.ipynb index 37021ff7..e9876ac8 100644 --- a/doc/source/notebooks/working_with_wiod.ipynb +++ b/doc/source/notebooks/working_with_wiod.ipynb @@ -1457,7 +1457,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1471,7 +1471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/doc/source/notebooks/working_with_wiod.py b/doc/source/notebooks/working_with_wiod.py new file mode 100644 index 00000000..cf35524b --- /dev/null +++ b/doc/source/notebooks/working_with_wiod.py @@ -0,0 +1,107 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.15.0 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Handling the WIOD EE MRIO database + +# %% [markdown] +# ## Getting the database + +# %% [markdown] +# The WIOD database is available at http://www.wiod.org. You can download these files with the pymrio automatic downloader as described at [WIOD download](autodownload.ipynb#WIOD-download). + +# %% [markdown] +# In the most simple case you get the full WIOD database with: + +# %% +import pymrio + +# %% +wiod_storage = "/tmp/mrios/WIOD2013" + +# %% +wiod_meta = pymrio.download_wiod2013(storage_folder=wiod_storage) + +# %% [markdown] +# This download the whole 2013 release of WIOD including all extensions. + +# %% [markdown] +# The extension (satellite accounts) are provided as zip files. You can use them directly in pymrio (without extracting them). If you want to have them extracted, create a folder with the name of each extension (without the ending ".zip") and extract the zip file there. + +# %% [markdown] +# ## Parsing + +# %% [markdown] +# ### Parsing a single year + +# %% [markdown] +# A single year of the WIOD database can be parse by: + +# %% +wiod2007 = pymrio.parse_wiod(year=2007, path=wiod_storage) + +# %% [markdown] +# Which loads the specific year and extension data: + +# %% +wiod2007.Z.head() + +# %% +wiod2007.AIR.F + +# %% [markdown] +# If a WIOD SEA file is present (at the root of path or in a folder named +# 'SEA' - only one file!), the labor data of this file gets included in the +# factor_input extension (calculated for the the three skill levels +# available). The monetary data in this file is not added because it is only +# given in national currency: + +# %% +wiod2007.SEA.F + +# %% [markdown] +# Provenance tracking and additional meta data is availabe in the field ```meta```: + +# %% +print(wiod2007.meta) + +# %% [markdown] +# WIOD provides three different sector/final demand categories naming +# schemes. The one to use for pymrio can specified by passing a tuple +# ```names=``` with: +# +# 1) 'isic': ISIC rev 3 Codes - available for interindustry flows and final demand rows. +# +# 2) 'full': Full names - available for final demand rows and final demand columns (categories) and interindustry flows. +# +# 3) 'c_codes' : WIOD specific sector numbers, available for final demand rows and columns (categories) and interindustry flows. +# +# Internally, the parser relies on 1) for the interindustry flows and 3) for the final demand categories. This is the default and will also be used if just 'isic' gets passed ('c_codes' also replace 'isic' if this was passed for final demand categories). To specify different finial consumption category names, pass a tuple with (sectors/interindustry classification, fd categories), eg ('isic', 'full'). Names are case insensitive and passing the first character is sufficient. +# +# For example, for loading wiod with full sector names: +# +# + +# %% +wiod2007_full = pymrio.parse_wiod(year=2007, path=wiod_storage, names=("full", "full")) +wiod2007_full.Y.head() + +# %% [markdown] +# The wiod parsing routine provides some more options - for a full specification see [the API reference](../api_doc/pymrio.parse_wiod.rst) + +# %% [markdown] +# ### Parsing multiple years + +# %% [markdown] +# Multiple years can be passed by running the parser in a for loop. diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index 275dde6e..d838c8c5 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -1630,7 +1630,7 @@ def get_row_data(self, row, name=None): retdict["name"] = name return retdict - def extract(self, index, dataframes=None, return_as_extension=False): + def extract(self, index, dataframes=None, return_type="dataframes"): """Returns a dict with all available data for a row in the extension. @@ -1645,16 +1645,16 @@ def extract(self, index, dataframes=None, return_as_extension=False): all available dataframes are extracted. If the list contains dataframes which are not available, a warning is issued and the missing dataframes are ignored. - return_as_extension : boolean or str, optional - If True, returns an Extension object with the extracted data. - Can also be a string with the name for the new extension (otherwise set - based on the current extension name +_extracted). - If False (default), returns a dict with the extracted data. + return_type: str, optional + If 'dataframe' or 'df' (also with 's' plural, default), the returned dict contains dataframes. + If 'extension' or 'ext' (also with 's' plural) an Extension object is returned (named like the original with _extracted appended). + Any other string: an Extension object is returned, with the name set to the passed string. Returns ------- dict object with the data (pandas DataFrame) for the specific rows + or an Extension object (based on return_type) """ if type(index) is dict: @@ -1674,14 +1674,13 @@ def extract(self, index, dataframes=None, return_as_extension=False): data = getattr(self, dfname) retdict[dfname] = pd.DataFrame(data.loc[index]) - if return_as_extension: - if type(return_as_extension) is str: - ext_name = return_as_extension - else: - ext_name = self.name + "_extracted" - return Extension(name=ext_name, **retdict) - else: + if return_type.lower() in ["dataframes", "dataframe", "dfs", "df"]: return retdict + elif return_type.lower() in ["extensions", "extension", "ext", "exts"]: + ext_name = self.name + "_extracted" + else: + ext_name = return_type + return Extension(name=ext_name, **retdict) def diag_stressor(self, stressor, name=None, _meta=None): """Diagonalize one row of the stressor matrix for a flow analysis. @@ -2443,7 +2442,7 @@ def extension_extract( extensions with non-empty extracted data are returned. return_type: str, optional - If 'dataframes' or 'df' (default), the returned dict contains dataframes. + If 'dataframe' or 'df' (also with 's' plural, default), the returned dict contains dataframes. If 'extensions' or 'ext', the returned dict contains Extension instances. Any other string: Return one merged extension with the name set to the passed string (this will automatically exclude empty extensions). @@ -2456,22 +2455,22 @@ def extension_extract( the matched rows as values """ - if return_type.lower() in ["dataframes", "df"]: + if return_type.lower() in ["dataframes", "dataframe", "dfs", "df"]: return_as_extension = False + ext_name = None + elif return_type.lower() in ["extensions", "extension", "ext", "exts"]: + return_as_extension = True + ext_name = None else: return_as_extension = True - - if return_type.lower() not in ["dataframes", "df", "ext", "extension"]: ext_name = return_type - else: - ext_name = None extracts = self._apply_extension_method( extensions=None, method="extract", index=index_dict, dataframes=dataframes, - return_as_extension=return_as_extension, + return_type=return_type, ) if (not include_empty) or ext_name: diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py index bbe29767..815798ac 100644 --- a/pymrio/tools/ioutil.py +++ b/pymrio/tools/ioutil.py @@ -997,16 +997,16 @@ def _index_regex_matcher(_dfs_idx, _method, _find_all=None, **kwargs): return _dfs_idx + def _get_sample(): # DEV: Remove for release - + pass +def match_and_convert(df_orig, df_map, agg_func="sum"): + """Match and convert a DataFrame to a new classification -def match_and_convert(df_orig, df_map, agg_func='sum'): - """ Match and convert a DataFrame to a new classification - Parameters ---------- df_orig : pd.DataFrame @@ -1015,13 +1015,13 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): df_map : pd.DataFrame The DataFrame with the mapping of the old to the new classification. - This requires a specific structure, which depends on the structure of the + This requires a specific structure, which depends on the structure of the dataframe to be characterized: one column for each index level in the dataframe and one column for each new index level in the characterized result dataframe. This is better explained with an example. Assuming a dataframe with index names 'stressor' and 'compartment' the characterizing dataframe would have the following structure: - + stressor ... original index name compartment ... original index name factor ... the factor for multiplication @@ -1049,23 +1049,23 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): # agg_func = 'sum' # df_map = pd.DataFrame( # columns=["em_type", "compart", "total__em_type", "factor"], - # data=[["em.*", "air|water", "total_regex", 2], - # ["em1", "air", "total_sum", 2], - # ["em1", "water", "total_sum", 2], - # ["em2", "air", "total_sum", 2], - # ["em2", "water", "total_sum", 2], - # ["em1", "air", "all_air", 0.5], + # data=[["em.*", "air|water", "total_regex", 2], + # ["em1", "air", "total_sum", 2], + # ["em1", "water", "total_sum", 2], + # ["em2", "air", "total_sum", 2], + # ["em2", "water", "total_sum", 2], + # ["em1", "air", "all_air", 0.5], # ["em2", "air", "all_air", 0.5]], # ) # # df_map = pd.DataFrame( # columns=["em_type", "total__em_type", "factor"], - # data=[["em.*", "total_regex", 2], - # ["em1", "total_sum", 2], - # ["em1", "total_sum", 2], - # ["em2", "total_sum", 2], - # ["em2", "total_sum", 2], - # ["em1", "all_air", 0.5], + # data=[["em.*", "total_regex", 2], + # ["em1", "total_sum", 2], + # ["em1", "total_sum", 2], + # ["em2", "total_sum", 2], + # ["em2", "total_sum", 2], + # ["em1", "all_air", 0.5], # ["em2", "all_air", 0.5]], # ) @@ -1076,12 +1076,12 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): # columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]), # ) # df_orig.columns.names = ["reg", "sec"] - # df_orig.index.names = ["em_type", "compart"] + # df_orig.index.names = ["em_type", "compart"] new_col = [col for col in df_map.columns if "__" in col] unique_new_index = df_map.loc[:, new_col].value_counts().index - df_map = df_map.set_index(new_col) + df_map = df_map.set_index(new_col) res_collector = [] # loop over each new impact/characterized value @@ -1091,7 +1091,9 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): else: df_cur_map = df_map.loc[[char]] - agg_method = df_cur_map.agg_func if 'agg_func' in df_cur_map.columns else agg_func + agg_method = ( + df_cur_map.agg_func if "agg_func" in df_cur_map.columns else agg_func + ) collector = [] @@ -1107,31 +1109,31 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): for idx_rename in df_cur_map.index.names: try: - new_idx_rename, old_idx_rename = idx_rename.split('__') + new_idx_rename, old_idx_rename = idx_rename.split("__") new_name_order.append(new_idx_rename) except ValueError: raise ValueError( - f"Column {idx_rename} does not contain/contains more then one '__'") + f"Column {idx_rename} does not contain/contains more then one '__'" + ) for idx_old_names in df_collected.index.names: if old_idx_rename in idx_old_names: - df_collected.index = df_collected.index.set_names(new_idx_rename, - level=idx_old_names) + df_collected.index = df_collected.index.set_names( + new_idx_rename, level=idx_old_names + ) df_collected.reset_index(level=new_idx_rename, inplace=True) for row in df_cur_map.reset_index().iterrows(): - new_row_name = row[1][idx_rename] old_row_name = row[1][old_idx_rename] - df_collected.loc[:, new_idx_rename] = df_collected.loc[:, new_idx_rename].str.replace(pat=old_row_name, - repl=new_row_name, - regex=True) + df_collected.loc[:, new_idx_rename] = df_collected.loc[ + :, new_idx_rename + ].str.replace(pat=old_row_name, repl=new_row_name, regex=True) - df_collected.set_index(new_idx_rename, - drop=True, - append=True, - inplace=True) + df_collected.set_index( + new_idx_rename, drop=True, append=True, inplace=True + ) # append name not in new_name_order if any @@ -1139,11 +1141,12 @@ def match_and_convert(df_orig, df_map, agg_func='sum'): if idx_name not in new_name_order: new_name_order.append(idx_name) df_collected = df_collected.reorder_levels(new_name_order, axis=0) - + # CONT: new test cases and logic for compartment included # Idea is to pass through all index levels which are not specified in the map or in the __ columns # To remove a level, provide __ and give it one common name (e.g. "DROP") and then remove - res_collector.append(df_collected.groupby(by=df_collected.index.names).agg(agg_method)) + res_collector.append( + df_collected.groupby(by=df_collected.index.names).agg(agg_method) + ) return pd.concat(res_collector, axis=0) - diff --git a/pymrio/version.py b/pymrio/version.py index b2d3c80f..27d4e712 100644 --- a/pymrio/version.py +++ b/pymrio/version.py @@ -1,2 +1 @@ __version__ = "0.6.dev" - diff --git a/tests/test_core.py b/tests/test_core.py index 02c25258..7fe59ccf 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -282,19 +282,19 @@ def test_extract(fix_testmrio): tt = fix_testmrio.testmrio.copy().calc_all() all_index = tt.emissions.get_index() - new_all = tt.emissions.extract(all_index, return_as_extension="new_all") + new_all = tt.emissions.extract(all_index, return_type="new_all") assert new_all.name == "new_all" for df in tt.emissions.get_DataFrame(): assert df in new_all.get_DataFrame() - name_check = tt.emissions.extract(all_index, return_as_extension=True) + name_check = tt.emissions.extract(all_index, return_type="ext") assert name_check.name == "Emissions_extracted" for df in tt.emissions.get_DataFrame(): assert df in name_check.get_DataFrame() id_air = tt.emissions.match(compartment="air") - new_air = tt.emissions.extract(index=id_air, return_as_extension="new_air") + new_air = tt.emissions.extract(index=id_air, return_type="new_air") assert "F" in new_air.get_DataFrame() assert "S" in new_air.get_DataFrame() @@ -314,7 +314,7 @@ def test_extension_extract(fix_testmrio): assert dfa["Factor Inputs"]["F"].shape[0] == 0 exta = tt.extension_extract( - match_air, dataframes=["F", "F_Y"], include_empty=True, return_as_extension=True + match_air, dataframes=["F", "F_Y"], include_empty=True, return_type="extension" ) assert exta["Factor Inputs"].F.shape[0] == 0 assert exta["Factor Inputs"].name == "Factor Inputs_extracted" @@ -325,7 +325,7 @@ def test_extension_extract(fix_testmrio): match_air, dataframes=["F", "F_Y"], include_empty=False, - return_as_extension=True, + return_type="ext", ) assert dfr["Emissions"]["F"].index[0] == ("emission_type1", "air") assert extr["Emissions"].F.index[0] == ("emission_type1", "air") diff --git a/tests/test_util.py b/tests/test_util.py index 8bd9ef3c..0c0ec6d5 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -355,7 +355,7 @@ def test_util_regex(): assert len(df_none_match_index) == 0 -def test_char_table(): +def test_match_and_convert(): """Testing the characterization of one table""" to_char = pd.DataFrame(