Skip to content

Commit

Permalink
finished extract notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Apr 9, 2024
1 parent cc968fe commit e9b3f7c
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 38 deletions.
4 changes: 4 additions & 0 deletions doc/source/notebooks/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,7 @@

# %%
pymrio.index_fullmatch(df, region="reg[2,4]", sector="m.*")

# %% [markdown]
# All search methods can easily be combined with the extract methods to extract the data that was found.
# For more information on this, see the [extract_data](./extract_data.ipynb) notebook.
60 changes: 33 additions & 27 deletions doc/source/notebooks/extract_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@
# To do so, we first define the rows (index) to extract:

# %%
rows_to_extract =[('emission_type1', 'air'),
('emission_type2', 'water')]
rows_to_extract = [("emission_type1", "air"), ("emission_type2", "water")]

# %% [markdown]
# We can now use the `extract` method to extract the data, either as a pandas DataFrame
Expand All @@ -82,29 +81,31 @@
# Note that the name of the extension object is now `Emissions_extracted`, based on the name of the original extension object.
# To use another name, just pass the name as the `return_type` method.

# %%
# %%
new_extension = mrio.emissions.extract(rows_to_extract, return_type="new_extension")
str(new_extension)

# %% [markdown]
# Extracting to dataframes is also a convienient
# Extracting to dataframes is also a convienient
# way to convert an extension object to a dictionary:

# %%
# %%
df_all = mrio.emissions.extract(mrio.emissions.get_rows(), return_type="dfs")
df_all.keys()


# The method also allows to only extract some of the accounts:
df_some = mrio.emissions.extract(mrio.emissions.get_rows(), dataframes=['D_cba', 'D_pba'], return_type="dfs")
df_some = mrio.emissions.extract(
mrio.emissions.get_rows(), dataframes=["D_cba", "D_pba"], return_type="dfs"
)
df_some.keys()


# %% [markdown]
#### Extracting from all extensions

# %% [markdown]
# We can also extract data from all extensions at once.
# We can also extract data from all extensions at once.
# This is done using the `extension_extract` method from the pymrio object.
# This expect a dict with keys based on the extension names and values as a list of rows (index) to extract.

Expand All @@ -113,22 +114,22 @@
# We first define the rows (index) to extract:

# %%
to_extract = {'Factor Inputs': 'Value Added',
'Emissions': [('emission_type1', 'air'),
('emission_type2', 'water')]}

to_extract = {
"Factor Inputs": "Value Added",
"Emissions": [("emission_type1", "air"), ("emission_type2", "water")],
}


# %% [markdown]
# And can then use the `extension_extract` method to extract the data, either as a pandas DataFrame,
# And can then use the `extension_extract` method to extract the data, either as a pandas DataFrame,
# which returns a dictionary with the extension names as keys

# %%
df_extract_all = mrio.extension_extract(to_extract, return_type="dataframe")
df_extract_all.keys()

# %%
df_extract_all['Factor Inputs'].keys()
# %%
df_extract_all["Factor Inputs"].keys()

# %% [markdown]
# We can also extract into a dictionary of extension objects:
Expand All @@ -137,8 +138,8 @@
ext_extract_all = mrio.extension_extract(to_extract, return_type="extensions")
ext_extract_all.keys()

# %%
str(ext_extract_all['Factor Inputs'])
# %%
str(ext_extract_all["Factor Inputs"])

# %% [markdown]
# Or merge the extracted data into a new pymrio Extension object (when passing a new name as return_type):
Expand All @@ -150,20 +151,25 @@
# %% [markdown]
# CONT: Continue with explaining, mention the work with find_all etc

# CONT: Make test cases for the things below


mrio.factor_inputs.extract("Value Added", return_type="ext").F

mrio.factor_inputs.extract(("Value Added"), return_type="ext").F
# %% [markdown]
#### Search and extract

mrio.factor_inputs.extract(["Value Added"], return_type="ext").F
# %% [markdown]
# The extract methods can also be used in combination with the [search/explore](./explore.ipynb) methods of pymrio.
# This allows to search for specific rows and then extract the data.

# %% [markdown]
# For example, to extract all emissions from the air compartment we can use:

mrio.factor_inputs.extract(mrio.factor_inputs.get_rows(), return_type="ext").F
# %%
match_air = mrio.extension_match(find_all="air")

mrio.emissions.extract(mrio.emissions.get_rows(), return_type="ext").F
# %% [markdown]
# And then make a new extension object with the extracted data:

mrio.emissions.extract(mrio.emissions.get_rows()[0], return_type="ext").F
# %%
air_emissions = mrio.emissions.extract(match_air, return_type="extracted_air_emissions")
print(air_emissions)

mrio.emissions.get_rows()[0]
# %% [markdown]
# For more information on the search methods see the [explore notebook](./explore.ipynb).
35 changes: 24 additions & 11 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,17 +305,31 @@ def test_extract(fix_testmrio):
assert "FOO" not in with_missing.keys()

# Test for correct shape when extracting one row
assert tt.factor_inputs.extract("Value Added", return_type="ext").F.index == tt.factor_inputs.get_rows()
assert tt.factor_inputs.extract(("Value Added"), return_type="ext").F.index == tt.factor_inputs.get_rows()
assert tt.factor_inputs.extract(["Value Added"], return_type="ext").F.index == tt.factor_inputs.get_rows()

assert tt.factor_inputs.extract(tt.factor_inputs.get_rows(), return_type="ext").F.index == tt.factor_inputs.get_rows()
pdt.assert_index_equal(tt.emissions.extract(tt.emissions.get_rows(), return_type="ext").F.index, tt.emissions.get_rows())
assert tt.emissions.extract(tt.emissions.get_rows()[0], return_type="ext").F.index == tt.emissions.get_rows()[0]



assert (
tt.factor_inputs.extract("Value Added", return_type="ext").F.index
== tt.factor_inputs.get_rows()
)
assert (
tt.factor_inputs.extract(("Value Added"), return_type="ext").F.index
== tt.factor_inputs.get_rows()
)
assert (
tt.factor_inputs.extract(["Value Added"], return_type="ext").F.index
== tt.factor_inputs.get_rows()
)

assert (
tt.factor_inputs.extract(tt.factor_inputs.get_rows(), return_type="ext").F.index
== tt.factor_inputs.get_rows()
)
pdt.assert_index_equal(
tt.emissions.extract(tt.emissions.get_rows(), return_type="ext").F.index,
tt.emissions.get_rows(),
)
assert (
tt.emissions.extract(tt.emissions.get_rows()[0], return_type="ext").F.index
== tt.emissions.get_rows()[0]
)


def test_extension_extract(fix_testmrio):
Expand Down Expand Up @@ -356,7 +370,6 @@ def test_extension_extract(fix_testmrio):
assert all(dfm.get_sectors() == tt.get_sectors())



def test_diag_stressor(fix_testmrio):
stressor_name = ("emission_type1", "air")
stressor_number = 0
Expand Down

0 comments on commit e9b3f7c

Please sign in to comment.