From e1df337bf1904d94489bcde963657a0d3c6ab748 Mon Sep 17 00:00:00 2001 From: Christopher Cave-Ayland Date: Tue, 25 Jun 2024 21:32:06 +0100 Subject: [PATCH 01/43] Add new model and test scaffolds --- .../default_new_input/agent_objectives.csv | 3 + .../example/default_new_input/agent_pairs.csv | 2 + .../default_new_input/agent_regions.csv | 2 + .../data/example/default_new_input/agents.csv | 3 + .../data/example/default_new_input/assets.csv | 8 +++ .../example/default_new_input/commodities.csv | 6 ++ .../default_new_input/commodity_costs.csv | 58 +++++++++++++++++++ .../default_new_input/commodity_trade.csv | 1 + .../data/example/default_new_input/demand.csv | 3 + .../default_new_input/demand_slicing.csv | 7 +++ .../process_availabilities.csv | 6 ++ .../default_new_input/process_flows.csv | 12 ++++ .../default_new_input/process_parameters.csv | 6 ++ .../default_new_input/process_regions.csv | 6 ++ .../example/default_new_input/processes.csv | 6 ++ .../example/default_new_input/regions.csv | 2 + .../example/default_new_input/sectors.csv | 4 ++ .../example/default_new_input/time_slices.csv | 7 +++ src/muse/examples.py | 8 +++ 19 files changed, 150 insertions(+) create mode 100644 src/muse/data/example/default_new_input/agent_objectives.csv create mode 100644 src/muse/data/example/default_new_input/agent_pairs.csv create mode 100644 src/muse/data/example/default_new_input/agent_regions.csv create mode 100644 src/muse/data/example/default_new_input/agents.csv create mode 100644 src/muse/data/example/default_new_input/assets.csv create mode 100644 src/muse/data/example/default_new_input/commodities.csv create mode 100644 src/muse/data/example/default_new_input/commodity_costs.csv create mode 100644 src/muse/data/example/default_new_input/commodity_trade.csv create mode 100644 src/muse/data/example/default_new_input/demand.csv create mode 100644 src/muse/data/example/default_new_input/demand_slicing.csv create mode 100644 src/muse/data/example/default_new_input/process_availabilities.csv create mode 100644 src/muse/data/example/default_new_input/process_flows.csv create mode 100644 src/muse/data/example/default_new_input/process_parameters.csv create mode 100644 src/muse/data/example/default_new_input/process_regions.csv create mode 100644 src/muse/data/example/default_new_input/processes.csv create mode 100644 src/muse/data/example/default_new_input/regions.csv create mode 100644 src/muse/data/example/default_new_input/sectors.csv create mode 100644 src/muse/data/example/default_new_input/time_slices.csv diff --git a/src/muse/data/example/default_new_input/agent_objectives.csv b/src/muse/data/example/default_new_input/agent_objectives.csv new file mode 100644 index 000000000..c14612aaa --- /dev/null +++ b/src/muse/data/example/default_new_input/agent_objectives.csv @@ -0,0 +1,3 @@ +agent,objective,objective_data,objective_sort +Agent1,LCOE,1,TRUE +Agent2,LCOE,1,TRUE diff --git a/src/muse/data/example/default_new_input/agent_pairs.csv b/src/muse/data/example/default_new_input/agent_pairs.csv new file mode 100644 index 000000000..72e306f93 --- /dev/null +++ b/src/muse/data/example/default_new_input/agent_pairs.csv @@ -0,0 +1,2 @@ +name,new_agent,retrofit_agent,quantity +A1,Agent1,Agent2,1 diff --git a/src/muse/data/example/default_new_input/agent_regions.csv b/src/muse/data/example/default_new_input/agent_regions.csv new file mode 100644 index 000000000..257b59615 --- /dev/null +++ b/src/muse/data/example/default_new_input/agent_regions.csv @@ -0,0 +1,2 @@ +agent_pair,region +A1,R1 diff --git a/src/muse/data/example/default_new_input/agents.csv b/src/muse/data/example/default_new_input/agents.csv new file mode 100644 index 000000000..2d1261bde --- /dev/null +++ b/src/muse/data/example/default_new_input/agents.csv @@ -0,0 +1,3 @@ +agent,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule +Agent1,New agent for A1,new,-1,inf,all,single +Agent2,Retrofit agent for A1,retrofit,-1,inf,all,single diff --git a/src/muse/data/example/default_new_input/assets.csv b/src/muse/data/example/default_new_input/assets.csv new file mode 100644 index 000000000..c30df1a13 --- /dev/null +++ b/src/muse/data/example/default_new_input/assets.csv @@ -0,0 +1,8 @@ +process_name,region,agent,capacity,year +gassupply1,R1,Agent2,15,2020 +gassupply1,R1,Agent2,15,2025 +gassupply1,R1,Agent2,7.5,2030 +gasCCGT,R1,Agent2,1,2020 +gasCCGT,R1,Agent2,1,2025 +gasboiler,R1,Agent2,10,2020 +gasboiler,R1,Agent2,5,2025 diff --git a/src/muse/data/example/default_new_input/commodities.csv b/src/muse/data/example/default_new_input/commodities.csv new file mode 100644 index 000000000..cec5cbf65 --- /dev/null +++ b/src/muse/data/example/default_new_input/commodities.csv @@ -0,0 +1,6 @@ +commodity_name,description,type,unit +electricity,Electricity,energy,PJ +gas,Gas,energy,PJ +heat,Heat,energy,PJ +wind,Wind,energy,PJ +C02f,Carbon dioxide,energy,kt diff --git a/src/muse/data/example/default_new_input/commodity_costs.csv b/src/muse/data/example/default_new_input/commodity_costs.csv new file mode 100644 index 000000000..85309f435 --- /dev/null +++ b/src/muse/data/example/default_new_input/commodity_costs.csv @@ -0,0 +1,58 @@ +year,region,commodity_name,value +2010,R1,electricity,14.81481472 +2015,R1,electricity,17.89814806 +2020,R1,electricity,19.5 +2025,R1,electricity,21.93518528 +2030,R1,electricity,26.50925917 +2035,R1,electricity,26.51851861 +2040,R1,electricity,23.85185194 +2045,R1,electricity,23.97222222 +2050,R1,electricity,24.06481472 +2055,R1,electricity,25.3425925 +2060,R1,electricity,25.53703694 +2065,R1,electricity,25.32407417 +2070,R1,electricity,23.36111111 +2075,R1,electricity,22.27777778 +2080,R1,electricity,22.25925917 +2085,R1,electricity,22.17592583 +2090,R1,electricity,22.03703694 +2095,R1,electricity,21.94444444 +2100,R1,electricity,21.39814806 +2010,R1,gas,6.6759 +2015,R1,gas,6.914325 +2020,R1,gas,7.15275 +2025,R1,gas,8.10645 +2030,R1,gas,9.06015 +2035,R1,gas,9.2191 +2040,R1,gas,9.37805 +2045,R1,gas,9.193829337 +2050,R1,gas,9.009608674 +2055,R1,gas,8.832625604 +2060,R1,gas,8.655642534 +2065,R1,gas,8.485612708 +2070,R1,gas,8.315582883 +2075,R1,gas,8.152233126 +2080,R1,gas,7.988883368 +2085,R1,gas,7.831951236 +2090,R1,gas,7.675019103 +2095,R1,gas,7.524252461 +2100,R1,gas,7.373485819 +2010,R1,CO2f,0 +2015,R1,CO2f,0.052913851 +2020,R1,CO2f,0.08314119 +2025,R1,CO2f,0.120069795 +2030,R1,CO2f,0.156998399 +2035,R1,CO2f,0.214877567 +2040,R1,CO2f,0.272756734 +2045,R1,CO2f,0.35394801 +2050,R1,CO2f,0.435139285 +2055,R1,CO2f,0.542365578 +2060,R1,CO2f,0.649591871 +2065,R1,CO2f,0.780892624 +2070,R1,CO2f,0.912193378 +2075,R1,CO2f,1.078321687 +2080,R1,CO2f,1.244449995 +2085,R1,CO2f,1.4253503 +2090,R1,CO2f,1.606250604 +2095,R1,CO2f,1.73877515 +2100,R1,CO2f,1.871299697 diff --git a/src/muse/data/example/default_new_input/commodity_trade.csv b/src/muse/data/example/default_new_input/commodity_trade.csv new file mode 100644 index 000000000..eb23c4b6c --- /dev/null +++ b/src/muse/data/example/default_new_input/commodity_trade.csv @@ -0,0 +1 @@ +commodity,region,net_import,year diff --git a/src/muse/data/example/default_new_input/demand.csv b/src/muse/data/example/default_new_input/demand.csv new file mode 100644 index 000000000..13c64fe8c --- /dev/null +++ b/src/muse/data/example/default_new_input/demand.csv @@ -0,0 +1,3 @@ +year,commodity_name,region,demand +2020,heat,R1,10 +2050,heat,R1,30 diff --git a/src/muse/data/example/default_new_input/demand_slicing.csv b/src/muse/data/example/default_new_input/demand_slicing.csv new file mode 100644 index 000000000..10d4693c2 --- /dev/null +++ b/src/muse/data/example/default_new_input/demand_slicing.csv @@ -0,0 +1,7 @@ +commodity,region,timeslice,fraction,year +heat,R1,night,0.1, +heat,R1,morning,0.15, +heat,R1,afternoon,0.1, +heat,R1,early-peak,0.15, +heat,R1,late-peak,0.3, +heat,R1,evening,0.2, diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv new file mode 100644 index 000000000..300a11407 --- /dev/null +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -0,0 +1,6 @@ +process_name,timeslice,lim_type,value,year,region +gassupply1,ALL,UP,0.9,, +gasCCGT,ALL,UP,0.9,, +windturbine,ALL,UP,0.4,, +gasboiler,ALL,UP,1,, +heatpump,ALL,UP,1,, diff --git a/src/muse/data/example/default_new_input/process_flows.csv b/src/muse/data/example/default_new_input/process_flows.csv new file mode 100644 index 000000000..e6e72c67d --- /dev/null +++ b/src/muse/data/example/default_new_input/process_flows.csv @@ -0,0 +1,12 @@ +process_name,commodity_name,flow,year,region +gassupply1,gas,1,, +gasCCGT,gas,-1.67,, +gasCCGT,electricity,1,, +gasCCGT,CO2f,91.67,, +windturbine,wind,-1,, +windturbine,electricity,1,, +gasboiler,gas,-1.16,, +gasboiler,heat,1,, +gasboiler,CO2f,64.71,, +heatpump,electricity,-0.4,, +heatpump,heat,1,, diff --git a/src/muse/data/example/default_new_input/process_parameters.csv b/src/muse/data/example/default_new_input/process_parameters.csv new file mode 100644 index 000000000..00bb38bb7 --- /dev/null +++ b/src/muse/data/example/default_new_input/process_parameters.csv @@ -0,0 +1,6 @@ +process,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,life,scaling_size,efficiency,discount_rate,year,region +gassupply1,0,1,0,1,2.55,1,5,1,60,35,0.00000189,86,0.1,, +gasCCGT,23.78234399,1,0,1,0,1,2,1,60,35,0.00000189,86,0.1,, +windturbine,36.30771182,1,0,1,0,1,2,1,60,25,0.00000189,86,0.1,, +gasboiler,3.8,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1,, +heatpump,8.866667,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1,, diff --git a/src/muse/data/example/default_new_input/process_regions.csv b/src/muse/data/example/default_new_input/process_regions.csv new file mode 100644 index 000000000..e7ca8286c --- /dev/null +++ b/src/muse/data/example/default_new_input/process_regions.csv @@ -0,0 +1,6 @@ +process,region +gassupply1,R1 +gasCCGT,R1 +windturbine,R1 +gasboiler,R1 +heatpump,R1 diff --git a/src/muse/data/example/default_new_input/processes.csv b/src/muse/data/example/default_new_input/processes.csv new file mode 100644 index 000000000..7c4b9f818 --- /dev/null +++ b/src/muse/data/example/default_new_input/processes.csv @@ -0,0 +1,6 @@ +name,type,fuel,end_use,level,sector +gassupply1,energy,gas,gas,fixed,gas +gasCCGT,energy,gas,electricity,fixed,power +windturbine,energy,wind,electricity,fixed,power +gasboiler,energy,gas,heat,fixed,residential +heatpump,energy,electricity,heat,fixed,residential diff --git a/src/muse/data/example/default_new_input/regions.csv b/src/muse/data/example/default_new_input/regions.csv new file mode 100644 index 000000000..1583e5334 --- /dev/null +++ b/src/muse/data/example/default_new_input/regions.csv @@ -0,0 +1,2 @@ +name,description +R1,Region 1 diff --git a/src/muse/data/example/default_new_input/sectors.csv b/src/muse/data/example/default_new_input/sectors.csv new file mode 100644 index 000000000..a841328b6 --- /dev/null +++ b/src/muse/data/example/default_new_input/sectors.csv @@ -0,0 +1,4 @@ +name,description +gas,Gas sector +power,Power sector +residential,Residential sector diff --git a/src/muse/data/example/default_new_input/time_slices.csv b/src/muse/data/example/default_new_input/time_slices.csv new file mode 100644 index 000000000..376022d96 --- /dev/null +++ b/src/muse/data/example/default_new_input/time_slices.csv @@ -0,0 +1,7 @@ +season,day,time_of_day,fraction +all,all,night,0.1667 +all,all,morning,0.1667 +all,all,afternoon,0.1667 +all,all,early-peak,0.1667 +all,all,late-peak,0.1667 +all,all,evening,0.1667 diff --git a/src/muse/examples.py b/src/muse/examples.py index 189a82c85..40b06ac94 100644 --- a/src/muse/examples.py +++ b/src/muse/examples.py @@ -137,6 +137,8 @@ def copy_model( _copy_minimum_service(path) elif name.lower() == "trade": _copy_trade(path) + elif name.lower() == "default_new_input": + _copy_default_new_input(path) return path @@ -316,6 +318,12 @@ def update_lpsolver(data): modify_toml(path / "settings.toml", update_lpsolver) +def _copy_default_new_input(path: Path): + from shutil import copytree + + copytree(example_data_dir() / "default_new_input", path) + + def _copy_default_timeslice(path: Path): copytree(example_data_dir() / "default_timeslice", path) From 79016d948e9c4405534bea704c1c89f590b1e7e4 Mon Sep 17 00:00:00 2001 From: Christopher Cave-Ayland Date: Thu, 27 Jun 2024 16:06:00 +0100 Subject: [PATCH 02/43] Get tests running --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9e778e694..43220c550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,9 @@ dependencies = [ "xlrd", "mypy-extensions", "pypubsub", - "tomlkit" + "tomlkit", + "duckdb", + "fsspec" ] dynamic = ["version"] From 549cdd17b4a8a9f7c7d86ee160f7687e2705fcdc Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 3 Jul 2024 16:44:02 +0100 Subject: [PATCH 03/43] Change column titles and order --- .../default_new_input/agent_objectives.csv | 2 +- .../example/default_new_input/agent_pairs.csv | 2 +- .../default_new_input/agent_regions.csv | 2 +- .../data/example/default_new_input/agents.csv | 2 +- .../data/example/default_new_input/assets.csv | 16 +-- .../example/default_new_input/commodities.csv | 2 +- .../default_new_input/commodity_costs.csv | 116 +++++++++--------- .../default_new_input/commodity_trade.csv | 2 +- .../data/example/default_new_input/demand.csv | 6 +- .../default_new_input/demand_slicing.csv | 14 +-- .../process_availabilities.csv | 12 +- .../default_new_input/process_flows.csv | 24 ++-- .../default_new_input/process_parameters.csv | 12 +- .../default_new_input/process_regions.csv | 2 +- .../example/default_new_input/processes.csv | 12 +- .../example/default_new_input/regions.csv | 2 +- .../example/default_new_input/sectors.csv | 2 +- 17 files changed, 115 insertions(+), 115 deletions(-) diff --git a/src/muse/data/example/default_new_input/agent_objectives.csv b/src/muse/data/example/default_new_input/agent_objectives.csv index c14612aaa..331c649c9 100644 --- a/src/muse/data/example/default_new_input/agent_objectives.csv +++ b/src/muse/data/example/default_new_input/agent_objectives.csv @@ -1,3 +1,3 @@ -agent,objective,objective_data,objective_sort +agent_id,objective,objective_data,objective_sort Agent1,LCOE,1,TRUE Agent2,LCOE,1,TRUE diff --git a/src/muse/data/example/default_new_input/agent_pairs.csv b/src/muse/data/example/default_new_input/agent_pairs.csv index 72e306f93..172632275 100644 --- a/src/muse/data/example/default_new_input/agent_pairs.csv +++ b/src/muse/data/example/default_new_input/agent_pairs.csv @@ -1,2 +1,2 @@ -name,new_agent,retrofit_agent,quantity +id,new_agent_id,retrofit_agent_id,quantity A1,Agent1,Agent2,1 diff --git a/src/muse/data/example/default_new_input/agent_regions.csv b/src/muse/data/example/default_new_input/agent_regions.csv index 257b59615..6a39852ea 100644 --- a/src/muse/data/example/default_new_input/agent_regions.csv +++ b/src/muse/data/example/default_new_input/agent_regions.csv @@ -1,2 +1,2 @@ -agent_pair,region +agent_pair_id,region_id A1,R1 diff --git a/src/muse/data/example/default_new_input/agents.csv b/src/muse/data/example/default_new_input/agents.csv index 2d1261bde..652e2d978 100644 --- a/src/muse/data/example/default_new_input/agents.csv +++ b/src/muse/data/example/default_new_input/agents.csv @@ -1,3 +1,3 @@ -agent,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule +agent_id,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule Agent1,New agent for A1,new,-1,inf,all,single Agent2,Retrofit agent for A1,retrofit,-1,inf,all,single diff --git a/src/muse/data/example/default_new_input/assets.csv b/src/muse/data/example/default_new_input/assets.csv index c30df1a13..8648bb891 100644 --- a/src/muse/data/example/default_new_input/assets.csv +++ b/src/muse/data/example/default_new_input/assets.csv @@ -1,8 +1,8 @@ -process_name,region,agent,capacity,year -gassupply1,R1,Agent2,15,2020 -gassupply1,R1,Agent2,15,2025 -gassupply1,R1,Agent2,7.5,2030 -gasCCGT,R1,Agent2,1,2020 -gasCCGT,R1,Agent2,1,2025 -gasboiler,R1,Agent2,10,2020 -gasboiler,R1,Agent2,5,2025 +agent_id,process_id,region_id,year,capacity +Agent2,gassupply1,R1,2020,15 +Agent2,gassupply1,R1,2025,15 +Agent2,gassupply1,R1,2030,7.5 +Agent2,gasCCGT,R1,2020,1 +Agent2,gasCCGT,R1,2025,1 +Agent2,gasboiler,R1,2020,10 +Agent2,gasboiler,R1,2025,5 diff --git a/src/muse/data/example/default_new_input/commodities.csv b/src/muse/data/example/default_new_input/commodities.csv index cec5cbf65..ac830346a 100644 --- a/src/muse/data/example/default_new_input/commodities.csv +++ b/src/muse/data/example/default_new_input/commodities.csv @@ -1,4 +1,4 @@ -commodity_name,description,type,unit +commodity_id,description,type,unit electricity,Electricity,energy,PJ gas,Gas,energy,PJ heat,Heat,energy,PJ diff --git a/src/muse/data/example/default_new_input/commodity_costs.csv b/src/muse/data/example/default_new_input/commodity_costs.csv index 85309f435..0a64542b3 100644 --- a/src/muse/data/example/default_new_input/commodity_costs.csv +++ b/src/muse/data/example/default_new_input/commodity_costs.csv @@ -1,58 +1,58 @@ -year,region,commodity_name,value -2010,R1,electricity,14.81481472 -2015,R1,electricity,17.89814806 -2020,R1,electricity,19.5 -2025,R1,electricity,21.93518528 -2030,R1,electricity,26.50925917 -2035,R1,electricity,26.51851861 -2040,R1,electricity,23.85185194 -2045,R1,electricity,23.97222222 -2050,R1,electricity,24.06481472 -2055,R1,electricity,25.3425925 -2060,R1,electricity,25.53703694 -2065,R1,electricity,25.32407417 -2070,R1,electricity,23.36111111 -2075,R1,electricity,22.27777778 -2080,R1,electricity,22.25925917 -2085,R1,electricity,22.17592583 -2090,R1,electricity,22.03703694 -2095,R1,electricity,21.94444444 -2100,R1,electricity,21.39814806 -2010,R1,gas,6.6759 -2015,R1,gas,6.914325 -2020,R1,gas,7.15275 -2025,R1,gas,8.10645 -2030,R1,gas,9.06015 -2035,R1,gas,9.2191 -2040,R1,gas,9.37805 -2045,R1,gas,9.193829337 -2050,R1,gas,9.009608674 -2055,R1,gas,8.832625604 -2060,R1,gas,8.655642534 -2065,R1,gas,8.485612708 -2070,R1,gas,8.315582883 -2075,R1,gas,8.152233126 -2080,R1,gas,7.988883368 -2085,R1,gas,7.831951236 -2090,R1,gas,7.675019103 -2095,R1,gas,7.524252461 -2100,R1,gas,7.373485819 -2010,R1,CO2f,0 -2015,R1,CO2f,0.052913851 -2020,R1,CO2f,0.08314119 -2025,R1,CO2f,0.120069795 -2030,R1,CO2f,0.156998399 -2035,R1,CO2f,0.214877567 -2040,R1,CO2f,0.272756734 -2045,R1,CO2f,0.35394801 -2050,R1,CO2f,0.435139285 -2055,R1,CO2f,0.542365578 -2060,R1,CO2f,0.649591871 -2065,R1,CO2f,0.780892624 -2070,R1,CO2f,0.912193378 -2075,R1,CO2f,1.078321687 -2080,R1,CO2f,1.244449995 -2085,R1,CO2f,1.4253503 -2090,R1,CO2f,1.606250604 -2095,R1,CO2f,1.73877515 -2100,R1,CO2f,1.871299697 +commodity_id,region_id,year,value +electricity,R1,2010,14.81481472 +electricity,R1,2015,17.89814806 +electricity,R1,2020,19.5 +electricity,R1,2025,21.93518528 +electricity,R1,2030,26.50925917 +electricity,R1,2035,26.51851861 +electricity,R1,2040,23.85185194 +electricity,R1,2045,23.97222222 +electricity,R1,2050,24.06481472 +electricity,R1,2055,25.3425925 +electricity,R1,2060,25.53703694 +electricity,R1,2065,25.32407417 +electricity,R1,2070,23.36111111 +electricity,R1,2075,22.27777778 +electricity,R1,2080,22.25925917 +electricity,R1,2085,22.17592583 +electricity,R1,2090,22.03703694 +electricity,R1,2095,21.94444444 +electricity,R1,2100,21.39814806 +gas,R1,2010,6.6759 +gas,R1,2015,6.914325 +gas,R1,2020,7.15275 +gas,R1,2025,8.10645 +gas,R1,2030,9.06015 +gas,R1,2035,9.2191 +gas,R1,2040,9.37805 +gas,R1,2045,9.193829337 +gas,R1,2050,9.009608674 +gas,R1,2055,8.832625604 +gas,R1,2060,8.655642534 +gas,R1,2065,8.485612708 +gas,R1,2070,8.315582883 +gas,R1,2075,8.152233126 +gas,R1,2080,7.988883368 +gas,R1,2085,7.831951236 +gas,R1,2090,7.675019103 +gas,R1,2095,7.524252461 +gas,R1,2100,7.373485819 +CO2f,R1,2010,0 +CO2f,R1,2015,0.052913851 +CO2f,R1,2020,0.08314119 +CO2f,R1,2025,0.120069795 +CO2f,R1,2030,0.156998399 +CO2f,R1,2035,0.214877567 +CO2f,R1,2040,0.272756734 +CO2f,R1,2045,0.35394801 +CO2f,R1,2050,0.435139285 +CO2f,R1,2055,0.542365578 +CO2f,R1,2060,0.649591871 +CO2f,R1,2065,0.780892624 +CO2f,R1,2070,0.912193378 +CO2f,R1,2075,1.078321687 +CO2f,R1,2080,1.244449995 +CO2f,R1,2085,1.4253503 +CO2f,R1,2090,1.606250604 +CO2f,R1,2095,1.73877515 +CO2f,R1,2100,1.871299697 diff --git a/src/muse/data/example/default_new_input/commodity_trade.csv b/src/muse/data/example/default_new_input/commodity_trade.csv index eb23c4b6c..092dd1559 100644 --- a/src/muse/data/example/default_new_input/commodity_trade.csv +++ b/src/muse/data/example/default_new_input/commodity_trade.csv @@ -1 +1 @@ -commodity,region,net_import,year +commodity_id,region_id,year,import,export diff --git a/src/muse/data/example/default_new_input/demand.csv b/src/muse/data/example/default_new_input/demand.csv index 13c64fe8c..b26c1b54d 100644 --- a/src/muse/data/example/default_new_input/demand.csv +++ b/src/muse/data/example/default_new_input/demand.csv @@ -1,3 +1,3 @@ -year,commodity_name,region,demand -2020,heat,R1,10 -2050,heat,R1,30 +commodity_id,region_id,year,demand +heat,R1,2020,10 +heat,R1,2050,30 diff --git a/src/muse/data/example/default_new_input/demand_slicing.csv b/src/muse/data/example/default_new_input/demand_slicing.csv index 10d4693c2..6877d5663 100644 --- a/src/muse/data/example/default_new_input/demand_slicing.csv +++ b/src/muse/data/example/default_new_input/demand_slicing.csv @@ -1,7 +1,7 @@ -commodity,region,timeslice,fraction,year -heat,R1,night,0.1, -heat,R1,morning,0.15, -heat,R1,afternoon,0.1, -heat,R1,early-peak,0.15, -heat,R1,late-peak,0.3, -heat,R1,evening,0.2, +commodity_id,region_id,year,timeslice,fraction +heat,R1,,night,0.1 +heat,R1,,morning,0.15 +heat,R1,,afternoon,0.1 +heat,R1,,early-peak,0.15 +heat,R1,,late-peak,0.3 +heat,R1,,evening,0.2 diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index 300a11407..1386d9db6 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,6 +1,6 @@ -process_name,timeslice,lim_type,value,year,region -gassupply1,ALL,UP,0.9,, -gasCCGT,ALL,UP,0.9,, -windturbine,ALL,UP,0.4,, -gasboiler,ALL,UP,1,, -heatpump,ALL,UP,1,, +process_id,region_id,year,timeslice,lim_type,value +gassupply1,,,ALL,UP,0.9 +gasCCGT,,,ALL,UP,0.9 +windturbine,,,ALL,UP,0.4 +gasboiler,,,ALL,UP,1 +heatpump,,,ALL,UP,1 diff --git a/src/muse/data/example/default_new_input/process_flows.csv b/src/muse/data/example/default_new_input/process_flows.csv index e6e72c67d..1c602f2d1 100644 --- a/src/muse/data/example/default_new_input/process_flows.csv +++ b/src/muse/data/example/default_new_input/process_flows.csv @@ -1,12 +1,12 @@ -process_name,commodity_name,flow,year,region -gassupply1,gas,1,, -gasCCGT,gas,-1.67,, -gasCCGT,electricity,1,, -gasCCGT,CO2f,91.67,, -windturbine,wind,-1,, -windturbine,electricity,1,, -gasboiler,gas,-1.16,, -gasboiler,heat,1,, -gasboiler,CO2f,64.71,, -heatpump,electricity,-0.4,, -heatpump,heat,1,, +process_id,commodity_id,region_id,year,flow +gassupply1,gas,,,1 +gasCCGT,gas,,,-1.67 +gasCCGT,electricity,,,1 +gasCCGT,CO2f,,,91.67 +windturbine,wind,,,-1 +windturbine,electricity,,,1 +gasboiler,gas,,,-1.16 +gasboiler,heat,,,1 +gasboiler,CO2f,,,64.71 +heatpump,electricity,,,-0.4 +heatpump,heat,,,1 diff --git a/src/muse/data/example/default_new_input/process_parameters.csv b/src/muse/data/example/default_new_input/process_parameters.csv index 00bb38bb7..5f162962a 100644 --- a/src/muse/data/example/default_new_input/process_parameters.csv +++ b/src/muse/data/example/default_new_input/process_parameters.csv @@ -1,6 +1,6 @@ -process,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,life,scaling_size,efficiency,discount_rate,year,region -gassupply1,0,1,0,1,2.55,1,5,1,60,35,0.00000189,86,0.1,, -gasCCGT,23.78234399,1,0,1,0,1,2,1,60,35,0.00000189,86,0.1,, -windturbine,36.30771182,1,0,1,0,1,2,1,60,25,0.00000189,86,0.1,, -gasboiler,3.8,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1,, -heatpump,8.866667,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1,, +process_id,region_id,year,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,life,scaling_size,efficiency,discount_rate +gassupply1,,,0,1,0,1,2.55,1,5,1,60,35,0.00000189,86,0.1 +gasCCGT,,,23.78234399,1,0,1,0,1,2,1,60,35,0.00000189,86,0.1 +windturbine,,,36.30771182,1,0,1,0,1,2,1,60,25,0.00000189,86,0.1 +gasboiler,,,3.8,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1 +heatpump,,,8.866667,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1 diff --git a/src/muse/data/example/default_new_input/process_regions.csv b/src/muse/data/example/default_new_input/process_regions.csv index e7ca8286c..8700ece5a 100644 --- a/src/muse/data/example/default_new_input/process_regions.csv +++ b/src/muse/data/example/default_new_input/process_regions.csv @@ -1,4 +1,4 @@ -process,region +process_id,region_id gassupply1,R1 gasCCGT,R1 windturbine,R1 diff --git a/src/muse/data/example/default_new_input/processes.csv b/src/muse/data/example/default_new_input/processes.csv index 7c4b9f818..e653e3ed6 100644 --- a/src/muse/data/example/default_new_input/processes.csv +++ b/src/muse/data/example/default_new_input/processes.csv @@ -1,6 +1,6 @@ -name,type,fuel,end_use,level,sector -gassupply1,energy,gas,gas,fixed,gas -gasCCGT,energy,gas,electricity,fixed,power -windturbine,energy,wind,electricity,fixed,power -gasboiler,energy,gas,heat,fixed,residential -heatpump,energy,electricity,heat,fixed,residential +id,sector_id,type,fuel,end_use,level +gassupply1,gas,energy,gas,gas,fixed +gasCCGT,power,energy,gas,electricity,fixed +windturbine,power,energy,wind,electricity,fixed +gasboiler,residential,energy,gas,heat,fixed +heatpump,residential,energy,electricity,heat,fixed diff --git a/src/muse/data/example/default_new_input/regions.csv b/src/muse/data/example/default_new_input/regions.csv index 1583e5334..1ce17d1ce 100644 --- a/src/muse/data/example/default_new_input/regions.csv +++ b/src/muse/data/example/default_new_input/regions.csv @@ -1,2 +1,2 @@ -name,description +id,description R1,Region 1 diff --git a/src/muse/data/example/default_new_input/sectors.csv b/src/muse/data/example/default_new_input/sectors.csv index a841328b6..7488adac9 100644 --- a/src/muse/data/example/default_new_input/sectors.csv +++ b/src/muse/data/example/default_new_input/sectors.csv @@ -1,4 +1,4 @@ -name,description +id,description gas,Gas sector power,Power sector residential,Residential sector From 01760a23fdfc3982c4d089aecc6323f82423fff5 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 3 Jul 2024 17:20:04 +0100 Subject: [PATCH 04/43] Correct id columns --- src/muse/data/example/default_new_input/agents.csv | 2 +- src/muse/data/example/default_new_input/commodities.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/muse/data/example/default_new_input/agents.csv b/src/muse/data/example/default_new_input/agents.csv index 652e2d978..a95e5b9dd 100644 --- a/src/muse/data/example/default_new_input/agents.csv +++ b/src/muse/data/example/default_new_input/agents.csv @@ -1,3 +1,3 @@ -agent_id,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule +id,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule Agent1,New agent for A1,new,-1,inf,all,single Agent2,Retrofit agent for A1,retrofit,-1,inf,all,single diff --git a/src/muse/data/example/default_new_input/commodities.csv b/src/muse/data/example/default_new_input/commodities.csv index ac830346a..5d87b119e 100644 --- a/src/muse/data/example/default_new_input/commodities.csv +++ b/src/muse/data/example/default_new_input/commodities.csv @@ -1,4 +1,4 @@ -commodity_id,description,type,unit +id,description,type,unit electricity,Electricity,energy,PJ gas,Gas,energy,PJ heat,Heat,energy,PJ From 08c9c70f24e094b4051a5e3972c4fd0325cbac3f Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 4 Aug 2025 15:38:13 +0100 Subject: [PATCH 05/43] Ignore default_new_input in regression tests --- src/muse/examples.py | 1 + tests/test_fullsim_regression.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/muse/examples.py b/src/muse/examples.py index 40b06ac94..6bfaa6e7e 100644 --- a/src/muse/examples.py +++ b/src/muse/examples.py @@ -52,6 +52,7 @@ "multiple_agents", "minimum_service", "trade", + "default_new_input", ] diff --git a/tests/test_fullsim_regression.py b/tests/test_fullsim_regression.py index 21897c660..878965c26 100644 --- a/tests/test_fullsim_regression.py +++ b/tests/test_fullsim_regression.py @@ -5,6 +5,9 @@ from muse.examples import AVAILABLE_EXAMPLES +# temporary skip for default_new_input as this is not yet working +AVAILABLE_EXAMPLES.pop("default_new_input") + @mark.regression @mark.example From 8059a2ea79a86ced1eb093ec2a5aed80bfe41f7d Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 4 Aug 2025 15:44:58 +0100 Subject: [PATCH 06/43] Fix typo in CO2 --- src/muse/data/example/default_new_input/commodities.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/muse/data/example/default_new_input/commodities.csv b/src/muse/data/example/default_new_input/commodities.csv index 5d87b119e..b4d546a74 100644 --- a/src/muse/data/example/default_new_input/commodities.csv +++ b/src/muse/data/example/default_new_input/commodities.csv @@ -3,4 +3,4 @@ electricity,Electricity,energy,PJ gas,Gas,energy,PJ heat,Heat,energy,PJ wind,Wind,energy,PJ -C02f,Carbon dioxide,energy,kt +CO2f,Carbon dioxide,energy,kt From e9361daecdcdab6177341cd481e4313fc3647ff0 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 8 Aug 2025 12:37:48 +0100 Subject: [PATCH 07/43] Fix popping error --- tests/test_fullsim_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fullsim_regression.py b/tests/test_fullsim_regression.py index 878965c26..49197e704 100644 --- a/tests/test_fullsim_regression.py +++ b/tests/test_fullsim_regression.py @@ -6,7 +6,7 @@ from muse.examples import AVAILABLE_EXAMPLES # temporary skip for default_new_input as this is not yet working -AVAILABLE_EXAMPLES.pop("default_new_input") +AVAILABLE_EXAMPLES.remove("default_new_input") @mark.regression From 355754f586d10d8189d5c28f3d1a86cf81bd0bf4 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 8 Aug 2025 15:02:45 +0100 Subject: [PATCH 08/43] Update to be closer to current MUSE2 format --- .../default_new_input/agent_objectives.csv | 3 +- .../example/default_new_input/agent_pairs.csv | 2 -- .../default_new_input/agent_regions.csv | 2 -- .../data/example/default_new_input/agents.csv | 7 ++-- .../data/example/default_new_input/assets.csv | 14 ++++---- .../default_new_input/commodity_costs.csv | 36 ------------------- .../default_new_input/commodity_trade.csv | 1 - .../default_new_input/demand_slicing.csv | 12 +++---- .../process_availabilities.csv | 10 +++--- .../default_new_input/process_flows.csv | 22 ++++++------ .../default_new_input/process_parameters.csv | 12 +++---- .../default_new_input/process_regions.csv | 6 ---- .../example/default_new_input/processes.csv | 12 +++---- .../example/default_new_input/time_slices.csv | 12 +++---- 14 files changed, 51 insertions(+), 100 deletions(-) delete mode 100644 src/muse/data/example/default_new_input/agent_pairs.csv delete mode 100644 src/muse/data/example/default_new_input/agent_regions.csv delete mode 100644 src/muse/data/example/default_new_input/commodity_trade.csv delete mode 100644 src/muse/data/example/default_new_input/process_regions.csv diff --git a/src/muse/data/example/default_new_input/agent_objectives.csv b/src/muse/data/example/default_new_input/agent_objectives.csv index 331c649c9..a1878883b 100644 --- a/src/muse/data/example/default_new_input/agent_objectives.csv +++ b/src/muse/data/example/default_new_input/agent_objectives.csv @@ -1,3 +1,2 @@ -agent_id,objective,objective_data,objective_sort +agent_id,objective_type,decision_weight,objective_sort Agent1,LCOE,1,TRUE -Agent2,LCOE,1,TRUE diff --git a/src/muse/data/example/default_new_input/agent_pairs.csv b/src/muse/data/example/default_new_input/agent_pairs.csv deleted file mode 100644 index 172632275..000000000 --- a/src/muse/data/example/default_new_input/agent_pairs.csv +++ /dev/null @@ -1,2 +0,0 @@ -id,new_agent_id,retrofit_agent_id,quantity -A1,Agent1,Agent2,1 diff --git a/src/muse/data/example/default_new_input/agent_regions.csv b/src/muse/data/example/default_new_input/agent_regions.csv deleted file mode 100644 index 6a39852ea..000000000 --- a/src/muse/data/example/default_new_input/agent_regions.csv +++ /dev/null @@ -1,2 +0,0 @@ -agent_pair_id,region_id -A1,R1 diff --git a/src/muse/data/example/default_new_input/agents.csv b/src/muse/data/example/default_new_input/agents.csv index a95e5b9dd..08d51e0ff 100644 --- a/src/muse/data/example/default_new_input/agents.csv +++ b/src/muse/data/example/default_new_input/agents.csv @@ -1,3 +1,4 @@ -id,description,type,maturity_threshold,annual_cost_limit,search_rule,decision_rule -Agent1,New agent for A1,new,-1,inf,all,single -Agent2,Retrofit agent for A1,retrofit,-1,inf,all,single +id,description,region_id,sector_id,search_rule,decision_rule,quantity +Agent1,Agent1_R1_residential,R1,residential,all,single,1 +Agent1,Agent1_R1_power,R1,power,all,single,1 +Agent1,Agent1_R1_gas,R1,gas,all,single,1 diff --git a/src/muse/data/example/default_new_input/assets.csv b/src/muse/data/example/default_new_input/assets.csv index 8648bb891..487ed794f 100644 --- a/src/muse/data/example/default_new_input/assets.csv +++ b/src/muse/data/example/default_new_input/assets.csv @@ -1,8 +1,8 @@ agent_id,process_id,region_id,year,capacity -Agent2,gassupply1,R1,2020,15 -Agent2,gassupply1,R1,2025,15 -Agent2,gassupply1,R1,2030,7.5 -Agent2,gasCCGT,R1,2020,1 -Agent2,gasCCGT,R1,2025,1 -Agent2,gasboiler,R1,2020,10 -Agent2,gasboiler,R1,2025,5 +Agent1,gassupply1,R1,2020,15 +Agent1,gassupply1,R1,2025,15 +Agent1,gassupply1,R1,2030,7.5 +Agent1,gasCCGT,R1,2020,1 +Agent1,gasCCGT,R1,2025,1 +Agent1,gasboiler,R1,2020,10 +Agent1,gasboiler,R1,2025,5 diff --git a/src/muse/data/example/default_new_input/commodity_costs.csv b/src/muse/data/example/default_new_input/commodity_costs.csv index 0a64542b3..88a4ee1c1 100644 --- a/src/muse/data/example/default_new_input/commodity_costs.csv +++ b/src/muse/data/example/default_new_input/commodity_costs.csv @@ -1,6 +1,4 @@ commodity_id,region_id,year,value -electricity,R1,2010,14.81481472 -electricity,R1,2015,17.89814806 electricity,R1,2020,19.5 electricity,R1,2025,21.93518528 electricity,R1,2030,26.50925917 @@ -8,18 +6,6 @@ electricity,R1,2035,26.51851861 electricity,R1,2040,23.85185194 electricity,R1,2045,23.97222222 electricity,R1,2050,24.06481472 -electricity,R1,2055,25.3425925 -electricity,R1,2060,25.53703694 -electricity,R1,2065,25.32407417 -electricity,R1,2070,23.36111111 -electricity,R1,2075,22.27777778 -electricity,R1,2080,22.25925917 -electricity,R1,2085,22.17592583 -electricity,R1,2090,22.03703694 -electricity,R1,2095,21.94444444 -electricity,R1,2100,21.39814806 -gas,R1,2010,6.6759 -gas,R1,2015,6.914325 gas,R1,2020,7.15275 gas,R1,2025,8.10645 gas,R1,2030,9.06015 @@ -27,18 +13,6 @@ gas,R1,2035,9.2191 gas,R1,2040,9.37805 gas,R1,2045,9.193829337 gas,R1,2050,9.009608674 -gas,R1,2055,8.832625604 -gas,R1,2060,8.655642534 -gas,R1,2065,8.485612708 -gas,R1,2070,8.315582883 -gas,R1,2075,8.152233126 -gas,R1,2080,7.988883368 -gas,R1,2085,7.831951236 -gas,R1,2090,7.675019103 -gas,R1,2095,7.524252461 -gas,R1,2100,7.373485819 -CO2f,R1,2010,0 -CO2f,R1,2015,0.052913851 CO2f,R1,2020,0.08314119 CO2f,R1,2025,0.120069795 CO2f,R1,2030,0.156998399 @@ -46,13 +20,3 @@ CO2f,R1,2035,0.214877567 CO2f,R1,2040,0.272756734 CO2f,R1,2045,0.35394801 CO2f,R1,2050,0.435139285 -CO2f,R1,2055,0.542365578 -CO2f,R1,2060,0.649591871 -CO2f,R1,2065,0.780892624 -CO2f,R1,2070,0.912193378 -CO2f,R1,2075,1.078321687 -CO2f,R1,2080,1.244449995 -CO2f,R1,2085,1.4253503 -CO2f,R1,2090,1.606250604 -CO2f,R1,2095,1.73877515 -CO2f,R1,2100,1.871299697 diff --git a/src/muse/data/example/default_new_input/commodity_trade.csv b/src/muse/data/example/default_new_input/commodity_trade.csv deleted file mode 100644 index 092dd1559..000000000 --- a/src/muse/data/example/default_new_input/commodity_trade.csv +++ /dev/null @@ -1 +0,0 @@ -commodity_id,region_id,year,import,export diff --git a/src/muse/data/example/default_new_input/demand_slicing.csv b/src/muse/data/example/default_new_input/demand_slicing.csv index 6877d5663..b9b610874 100644 --- a/src/muse/data/example/default_new_input/demand_slicing.csv +++ b/src/muse/data/example/default_new_input/demand_slicing.csv @@ -1,7 +1,7 @@ commodity_id,region_id,year,timeslice,fraction -heat,R1,,night,0.1 -heat,R1,,morning,0.15 -heat,R1,,afternoon,0.1 -heat,R1,,early-peak,0.15 -heat,R1,,late-peak,0.3 -heat,R1,,evening,0.2 +heat,R1,2020,all-year.all-week.night,0.1 +heat,R1,2020,all-year.all-week.morning,0.15 +heat,R1,2020,all-year.all-week.afternoon,0.1 +heat,R1,2020,all-year.all-week.early-peak,0.15 +heat,R1,2020,all-year.all-week.late-peak,0.3 +heat,R1,2020,all-year.all-week.evening,0.2 diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index 1386d9db6..7527ee58f 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,6 +1,4 @@ -process_id,region_id,year,timeslice,lim_type,value -gassupply1,,,ALL,UP,0.9 -gasCCGT,,,ALL,UP,0.9 -windturbine,,,ALL,UP,0.4 -gasboiler,,,ALL,UP,1 -heatpump,,,ALL,UP,1 +process_id,region_id,year,timeslice,limit_type,value +gassupply1,R1,2020,all-year.all-week,up,0.9 +gasCCGT,R1,2020,all-year.all-week,up,0.9 +windturbine,R1,2020,all-year.all-week,up,0.4 diff --git a/src/muse/data/example/default_new_input/process_flows.csv b/src/muse/data/example/default_new_input/process_flows.csv index 1c602f2d1..f0b979146 100644 --- a/src/muse/data/example/default_new_input/process_flows.csv +++ b/src/muse/data/example/default_new_input/process_flows.csv @@ -1,12 +1,12 @@ process_id,commodity_id,region_id,year,flow -gassupply1,gas,,,1 -gasCCGT,gas,,,-1.67 -gasCCGT,electricity,,,1 -gasCCGT,CO2f,,,91.67 -windturbine,wind,,,-1 -windturbine,electricity,,,1 -gasboiler,gas,,,-1.16 -gasboiler,heat,,,1 -gasboiler,CO2f,,,64.71 -heatpump,electricity,,,-0.4 -heatpump,heat,,,1 +gassupply1,gas,R1,2020,1 +gasCCGT,gas,R1,2020,-1.67 +gasCCGT,electricity,R1,2020,1 +gasCCGT,CO2f,R1,2020,91.67 +windturbine,wind,R1,2020,-1 +windturbine,electricity,R1,2020,1 +gasboiler,gas,R1,2020,-1.16 +gasboiler,heat,R1,2020,1 +gasboiler,CO2f,R1,2020,64.71 +heatpump,electricity,R1,2020,-0.4 +heatpump,heat,R1,2020,1 diff --git a/src/muse/data/example/default_new_input/process_parameters.csv b/src/muse/data/example/default_new_input/process_parameters.csv index 5f162962a..cd6e15e91 100644 --- a/src/muse/data/example/default_new_input/process_parameters.csv +++ b/src/muse/data/example/default_new_input/process_parameters.csv @@ -1,6 +1,6 @@ -process_id,region_id,year,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,life,scaling_size,efficiency,discount_rate -gassupply1,,,0,1,0,1,2.55,1,5,1,60,35,0.00000189,86,0.1 -gasCCGT,,,23.78234399,1,0,1,0,1,2,1,60,35,0.00000189,86,0.1 -windturbine,,,36.30771182,1,0,1,0,1,2,1,60,25,0.00000189,86,0.1 -gasboiler,,,3.8,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1 -heatpump,,,8.866667,1,0,1,0,1,10,0.02,60,10,0.00000189,86,0.1 +process_id,region_id,year,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,lifetime,discount_rate +gassupply1,R1,2020,0,1,0,1,2.55,1,5,1,60,35,0.1 +gasCCGT,R1,2020,23.78234399,1,0,1,0,1,2,1,60,35,0.1 +windturbine,R1,2020,36.30771182,1,0,1,0,1,2,1,60,25,0.1 +gasboiler,R1,2020,3.8,1,0,1,0,1,10,0.02,60,10,0.1 +heatpump,R1,2020,8.866667,1,0,1,0,1,10,0.02,60,10,0.1 diff --git a/src/muse/data/example/default_new_input/process_regions.csv b/src/muse/data/example/default_new_input/process_regions.csv deleted file mode 100644 index 8700ece5a..000000000 --- a/src/muse/data/example/default_new_input/process_regions.csv +++ /dev/null @@ -1,6 +0,0 @@ -process_id,region_id -gassupply1,R1 -gasCCGT,R1 -windturbine,R1 -gasboiler,R1 -heatpump,R1 diff --git a/src/muse/data/example/default_new_input/processes.csv b/src/muse/data/example/default_new_input/processes.csv index e653e3ed6..e68ad288c 100644 --- a/src/muse/data/example/default_new_input/processes.csv +++ b/src/muse/data/example/default_new_input/processes.csv @@ -1,6 +1,6 @@ -id,sector_id,type,fuel,end_use,level -gassupply1,gas,energy,gas,gas,fixed -gasCCGT,power,energy,gas,electricity,fixed -windturbine,power,energy,wind,electricity,fixed -gasboiler,residential,energy,gas,heat,fixed -heatpump,residential,energy,electricity,heat,fixed +id,description,sector_id +gassupply1,Gas supply,energy +gasCCGT,Gas CCGT,power +windturbine,Wind turbine,power +gasboiler,Gas boiler,residential +heatpump,Heat pump,residential diff --git a/src/muse/data/example/default_new_input/time_slices.csv b/src/muse/data/example/default_new_input/time_slices.csv index 376022d96..dc8774fe0 100644 --- a/src/muse/data/example/default_new_input/time_slices.csv +++ b/src/muse/data/example/default_new_input/time_slices.csv @@ -1,7 +1,7 @@ season,day,time_of_day,fraction -all,all,night,0.1667 -all,all,morning,0.1667 -all,all,afternoon,0.1667 -all,all,early-peak,0.1667 -all,all,late-peak,0.1667 -all,all,evening,0.1667 +all-year,all-week,night,0.1667 +all-year,all-week,morning,0.1667 +all-year,all-week,afternoon,0.1667 +all-year,all-week,early-peak,0.1667 +all-year,all-week,late-peak,0.1667 +all-year,all-week,evening,0.1667 From 3179a2ce4fdf37df609d45b7b3e3d92564e8a6e8 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 11 Aug 2025 15:16:14 +0100 Subject: [PATCH 09/43] Bring it even closer to the MUSE2 format --- .../example/default_new_input/agent_objectives.csv | 4 +++- src/muse/data/example/default_new_input/agents.csv | 6 +++--- src/muse/data/example/default_new_input/assets.csv | 14 ++++++-------- .../example/default_new_input/demand_slicing.csv | 14 +++++++------- .../default_new_input/process_availabilities.csv | 6 +++--- .../example/default_new_input/process_flows.csv | 2 +- .../default_new_input/process_parameters.csv | 12 ++++++------ .../data/example/default_new_input/time_slices.csv | 12 ++++++------ 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/muse/data/example/default_new_input/agent_objectives.csv b/src/muse/data/example/default_new_input/agent_objectives.csv index a1878883b..5bf0e59e4 100644 --- a/src/muse/data/example/default_new_input/agent_objectives.csv +++ b/src/muse/data/example/default_new_input/agent_objectives.csv @@ -1,2 +1,4 @@ agent_id,objective_type,decision_weight,objective_sort -Agent1,LCOE,1,TRUE +A1_RES,LCOE,1,TRUE +A1_PWR,LCOE,1,TRUE +A1_GAS,LCOE,1,TRUE diff --git a/src/muse/data/example/default_new_input/agents.csv b/src/muse/data/example/default_new_input/agents.csv index 08d51e0ff..d13223d4c 100644 --- a/src/muse/data/example/default_new_input/agents.csv +++ b/src/muse/data/example/default_new_input/agents.csv @@ -1,4 +1,4 @@ id,description,region_id,sector_id,search_rule,decision_rule,quantity -Agent1,Agent1_R1_residential,R1,residential,all,single,1 -Agent1,Agent1_R1_power,R1,power,all,single,1 -Agent1,Agent1_R1_gas,R1,gas,all,single,1 +A1_RES,Residential sector agent,R1,residential,all,single,1 +A1_PWR,Power sector agent,R1,power,all,single,1 +A1_GAS,Gas sector agent,R1,gas,all,single,1 diff --git a/src/muse/data/example/default_new_input/assets.csv b/src/muse/data/example/default_new_input/assets.csv index 487ed794f..1659aa9e1 100644 --- a/src/muse/data/example/default_new_input/assets.csv +++ b/src/muse/data/example/default_new_input/assets.csv @@ -1,8 +1,6 @@ -agent_id,process_id,region_id,year,capacity -Agent1,gassupply1,R1,2020,15 -Agent1,gassupply1,R1,2025,15 -Agent1,gassupply1,R1,2030,7.5 -Agent1,gasCCGT,R1,2020,1 -Agent1,gasCCGT,R1,2025,1 -Agent1,gasboiler,R1,2020,10 -Agent1,gasboiler,R1,2025,5 +agent_id,process_id,region_id,commission_year,capacity +A1_GAS,gassupply1,R1,1995,7.5 +A1_GAS,gassupply1,R1,2000,7.5 +A1_PWR,gasCCGT,R1,1995,1 +A1_RES,gasboiler,R1,2015,5 +A1_RES,gasboiler,R1,2020,5 diff --git a/src/muse/data/example/default_new_input/demand_slicing.csv b/src/muse/data/example/default_new_input/demand_slicing.csv index b9b610874..edf969b5d 100644 --- a/src/muse/data/example/default_new_input/demand_slicing.csv +++ b/src/muse/data/example/default_new_input/demand_slicing.csv @@ -1,7 +1,7 @@ -commodity_id,region_id,year,timeslice,fraction -heat,R1,2020,all-year.all-week.night,0.1 -heat,R1,2020,all-year.all-week.morning,0.15 -heat,R1,2020,all-year.all-week.afternoon,0.1 -heat,R1,2020,all-year.all-week.early-peak,0.15 -heat,R1,2020,all-year.all-week.late-peak,0.3 -heat,R1,2020,all-year.all-week.evening,0.2 +commodity_id,region_id,time_slice,fraction +heat,R1,all-year.all-week.night,0.1 +heat,R1,all-year.all-week.morning,0.15 +heat,R1,all-year.all-week.afternoon,0.1 +heat,R1,all-year.all-week.early-peak,0.15 +heat,R1,all-year.all-week.late-peak,0.3 +heat,R1,all-year.all-week.evening,0.2 diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index 7527ee58f..6c6901e07 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,4 +1,4 @@ process_id,region_id,year,timeslice,limit_type,value -gassupply1,R1,2020,all-year.all-week,up,0.9 -gasCCGT,R1,2020,all-year.all-week,up,0.9 -windturbine,R1,2020,all-year.all-week,up,0.4 +gassupply1,R1,2020,annual,up,0.9 +gasCCGT,R1,2020,annual,up,0.9 +windturbine,R1,2020,annual,up,0.4 diff --git a/src/muse/data/example/default_new_input/process_flows.csv b/src/muse/data/example/default_new_input/process_flows.csv index f0b979146..76415278a 100644 --- a/src/muse/data/example/default_new_input/process_flows.csv +++ b/src/muse/data/example/default_new_input/process_flows.csv @@ -1,4 +1,4 @@ -process_id,commodity_id,region_id,year,flow +process_id,commodity_id,region_id,year,coeff gassupply1,gas,R1,2020,1 gasCCGT,gas,R1,2020,-1.67 gasCCGT,electricity,R1,2020,1 diff --git a/src/muse/data/example/default_new_input/process_parameters.csv b/src/muse/data/example/default_new_input/process_parameters.csv index cd6e15e91..4a7f294b4 100644 --- a/src/muse/data/example/default_new_input/process_parameters.csv +++ b/src/muse/data/example/default_new_input/process_parameters.csv @@ -1,6 +1,6 @@ -process_id,region_id,year,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,max_capacity_addition,max_capacity_growth,total_capacity_limit,lifetime,discount_rate -gassupply1,R1,2020,0,1,0,1,2.55,1,5,1,60,35,0.1 -gasCCGT,R1,2020,23.78234399,1,0,1,0,1,2,1,60,35,0.1 -windturbine,R1,2020,36.30771182,1,0,1,0,1,2,1,60,25,0.1 -gasboiler,R1,2020,3.8,1,0,1,0,1,10,0.02,60,10,0.1 -heatpump,R1,2020,8.866667,1,0,1,0,1,10,0.02,60,10,0.1 +process_id,region_id,year,cap_par,fix_par,var_par,max_capacity_addition,max_capacity_growth,total_capacity_limit,lifetime,discount_rate +gassupply1,R1,2020,0,0,2.55,5,1,60,35,0.1 +gasCCGT,R1,2020,23.78234399,0,0,2,1,60,35,0.1 +windturbine,R1,2020,36.30771182,0,0,2,1,60,25,0.1 +gasboiler,R1,2020,3.8,0,0,10,0.02,60,10,0.1 +heatpump,R1,2020,8.866667,0,0,10,0.02,60,10,0.1 diff --git a/src/muse/data/example/default_new_input/time_slices.csv b/src/muse/data/example/default_new_input/time_slices.csv index dc8774fe0..7c7509279 100644 --- a/src/muse/data/example/default_new_input/time_slices.csv +++ b/src/muse/data/example/default_new_input/time_slices.csv @@ -1,7 +1,7 @@ season,day,time_of_day,fraction -all-year,all-week,night,0.1667 -all-year,all-week,morning,0.1667 -all-year,all-week,afternoon,0.1667 -all-year,all-week,early-peak,0.1667 -all-year,all-week,late-peak,0.1667 -all-year,all-week,evening,0.1667 +all-year,all-week,night,0.166667 +all-year,all-week,morning,0.166667 +all-year,all-week,afternoon,0.1666667 +all-year,all-week,early-peak,0.166667 +all-year,all-week,late-peak,0.166667 +all-year,all-week,evening,0.166667 From f69b9bd4f05078818128320b3014d77a4a9b853c Mon Sep 17 00:00:00 2001 From: Christopher Cave-Ayland Date: Thu, 27 Jun 2024 17:20:23 +0100 Subject: [PATCH 10/43] First pass at duckdb data interface --- src/muse/new_input/readers.py | 76 +++++++++++++++++ tests/test_readers.py | 149 ++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 src/muse/new_input/readers.py diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py new file mode 100644 index 000000000..eafa4fb07 --- /dev/null +++ b/src/muse/new_input/readers.py @@ -0,0 +1,76 @@ +import duckdb +import numpy as np +import xarray as xr + + +def read_inputs(data_dir): + data = {} + con = duckdb.connect(":memory:") + + with open(data_dir / "regions.csv") as f: + regions = read_regions_csv(f, con) # noqa: F841 + + with open(data_dir / "commodities.csv") as f: + commodities = read_commodities_csv(f, con) + + with open(data_dir / "demand.csv") as f: + demand = read_demand_csv(f, con) # noqa: F841 + + data["global_commodities"] = calculate_global_commodities(commodities) + return data + + +def read_regions_csv(buffer_, con): + sql = """CREATE TABLE regions ( + name VARCHAR PRIMARY KEY, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO regions SELECT name FROM rel;") + return con.sql("SELECT name from regions").fetchnumpy() + + +def read_commodities_csv(buffer_, con): + sql = """CREATE TABLE commodities ( + name VARCHAR PRIMARY KEY, + type VARCHAR CHECK (type IN ('energy', 'service', 'material', 'environmental')), + unit VARCHAR, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO commodities SELECT name, type, unit FROM rel;") + + return con.sql("select name, type, unit from commodities").fetchnumpy() + + +def calculate_global_commodities(commodities): + names = commodities["name"].astype(np.dtype("str")) + types = commodities["type"].astype(np.dtype("str")) + units = commodities["unit"].astype(np.dtype("str")) + + type_array = xr.DataArray( + data=types, dims=["commodity"], coords=dict(commodity=names) + ) + + unit_array = xr.DataArray( + data=units, dims=["commodity"], coords=dict(commodity=names) + ) + + data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) + return data + + +def read_demand_csv(buffer_, con): + sql = """CREATE TABLE demand ( + year BIGINT, + commodity VARCHAR REFERENCES commodities(name), + region VARCHAR REFERENCES regions(name), + demand DOUBLE, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO demand SELECT year, commodity_name, region, demand FROM rel;") + return con.sql("SELECT * from demand").fetchnumpy() diff --git a/tests/test_readers.py b/tests/test_readers.py index 924dcacff..107a5bfb1 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -1,6 +1,9 @@ +from io import StringIO from itertools import chain, permutations from pathlib import Path +import duckdb +import numpy as np import pandas as pd import toml import xarray as xr @@ -314,3 +317,149 @@ def test_get_nan_coordinates(): dataset3 = xr.Dataset.from_dataframe(df3.set_index(["region", "year"])) nan_coords3 = get_nan_coordinates(dataset3) assert nan_coords3 == [] + + +@fixture +def default_new_input(tmp_path): + from muse.examples import copy_model + + copy_model("default_new_input", tmp_path) + return tmp_path / "model" + + +@fixture +def con(): + return duckdb.connect(":memory:") + + +@fixture +def populate_regions(default_new_input, con): + from muse.new_input.readers import read_regions_csv + + with open(default_new_input / "regions.csv") as f: + return read_regions_csv(f, con) + + +@fixture +def populate_commodities(default_new_input, con): + from muse.new_input.readers import read_commodities_csv + + with open(default_new_input / "commodities.csv") as f: + return read_commodities_csv(f, con) + + +@fixture +def populate_demand(default_new_input, con, populate_regions, populate_commodities): + from muse.new_input.readers import read_demand_csv + + with open(default_new_input / "demand.csv") as f: + return read_demand_csv(f, con) + + +def test_read_regions(populate_regions): + assert populate_regions["name"] == np.array(["R1"]) + + +def test_read_new_global_commodities(populate_commodities): + data = populate_commodities + assert list(data["name"]) == ["electricity", "gas", "heat", "wind", "CO2f"] + assert list(data["type"]) == ["energy"] * 5 + assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] + + +def test_calculate_global_commodities(populate_commodities): + from muse.new_input.readers import calculate_global_commodities + + data = calculate_global_commodities(populate_commodities) + + assert isinstance(data, xr.Dataset) + assert set(data.dims) == {"commodity"} + for dt in data.dtypes.values(): + assert np.issubdtype(dt, np.dtype("str")) + + assert list(data.coords["commodity"].values) == list(populate_commodities["name"]) + assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) + assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) + + +def test_read_new_global_commodities_type_constraint(default_new_input, con): + from muse.new_input.readers import read_commodities_csv + + csv = StringIO("name,type,unit\nfoo,invalid,bar\n") + with raises(duckdb.ConstraintException): + read_commodities_csv(csv, con) + + +def test_new_read_demand_csv(populate_demand): + data = populate_demand + assert np.all(data["year"] == np.array([2020, 2050])) + assert np.all(data["commodity"] == np.array(["heat", "heat"])) + assert np.all(data["region"] == np.array(["R1", "R1"])) + assert np.all(data["demand"] == np.array([10, 30])) + + +def test_new_read_demand_csv_commodity_constraint( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_demand_csv + + csv = StringIO("year,commodity_name,region,demand\n2020,invalid,R1,0\n") + with raises(duckdb.ConstraintException, match=".*foreign key.*"): + read_demand_csv(csv, con) + + +def test_new_read_demand_csv_region_constraint( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_demand_csv + + csv = StringIO("year,commodity_name,region,demand\n2020,heat,invalid,0\n") + with raises(duckdb.ConstraintException, match=".*foreign key.*"): + read_demand_csv(csv, con) + + +@mark.xfail +def test_demand_dataset(default_new_input): + import duckdb + + from muse.new_input.readers import read_commodities, read_demand, read_regions + + con = duckdb.connect(":memory:") + + read_regions(default_new_input, con) + read_commodities(default_new_input, con) + data = read_demand(default_new_input, con) + + assert isinstance(data, xr.DataArray) + assert data.dtype == np.float64 + + assert set(data.dims) == {"year", "commodity", "region", "timeslice"} + assert list(data.coords["region"].values) == ["R1"] + assert list(data.coords["timeslice"].values) == list(range(1, 7)) + assert list(data.coords["year"].values) == [2020, 2050] + assert set(data.coords["commodity"].values) == { + "electricity", + "gas", + "heat", + "wind", + "CO2f", + } + + assert data.sel(year=2020, commodity="electricity", region="R1", timeslice=0) == 1 + + +@mark.xfail +def test_new_read_initial_market(default_new_input): + from muse.new_input.readers import read_inputs + + all_data = read_inputs(default_new_input) + data = all_data["initial_market"] + + assert isinstance(data, xr.Dataset) + assert set(data.dims) == {"region", "year", "commodity", "timeslice"} + assert dict(data.dtypes) == dict( + prices=np.float64, + exports=np.float64, + imports=np.float64, + static_trade=np.float64, + ) From 2685eb09d7f94c81777c096b90c9cdfcf9cbf2dd Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 3 Jul 2024 16:03:19 +0100 Subject: [PATCH 11/43] New db tables --- src/muse/new_input/readers.py | 111 +++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 28 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index eafa4fb07..a02f40a84 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -7,28 +7,26 @@ def read_inputs(data_dir): data = {} con = duckdb.connect(":memory:") - with open(data_dir / "regions.csv") as f: - regions = read_regions_csv(f, con) # noqa: F841 - with open(data_dir / "commodities.csv") as f: commodities = read_commodities_csv(f, con) + with open(data_dir / "commodity_trade.csv") as f: + commodity_trade = read_commodity_trade_csv(f, con) # noqa: F841 + + with open(data_dir / "commodity_costs.csv") as f: + commodity_costs = read_commodity_costs_csv(f, con) # noqa: F841 + with open(data_dir / "demand.csv") as f: demand = read_demand_csv(f, con) # noqa: F841 - data["global_commodities"] = calculate_global_commodities(commodities) - return data + with open(data_dir / "demand_slicing.csv") as f: + demand_slicing = read_demand_slicing_csv(f, con) # noqa: F841 + with open(data_dir / "regions.csv") as f: + regions = read_regions_csv(f, con) # noqa: F841 -def read_regions_csv(buffer_, con): - sql = """CREATE TABLE regions ( - name VARCHAR PRIMARY KEY, - ); - """ - con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO regions SELECT name FROM rel;") - return con.sql("SELECT name from regions").fetchnumpy() + data["global_commodities"] = calculate_global_commodities(commodities) + return data def read_commodities_csv(buffer_, con): @@ -41,25 +39,38 @@ def read_commodities_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO commodities SELECT name, type, unit FROM rel;") - return con.sql("select name, type, unit from commodities").fetchnumpy() -def calculate_global_commodities(commodities): - names = commodities["name"].astype(np.dtype("str")) - types = commodities["type"].astype(np.dtype("str")) - units = commodities["unit"].astype(np.dtype("str")) - - type_array = xr.DataArray( - data=types, dims=["commodity"], coords=dict(commodity=names) - ) +def read_commodity_trade_csv(buffer_, con): + sql = """CREATE TABLE commodity_trade ( + commodity VARCHAR REFERENCES commodities(name), + region VARCHAR REFERENCES regions(name), + year BIGINT, + import DOUBLE, + export DOUBLE, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("""INSERT INTO commodity_trade SELECT + commodity, region, year, import, export FROM rel;""") + return con.sql("SELECT * from commodity_trade").fetchnumpy() - unit_array = xr.DataArray( - data=units, dims=["commodity"], coords=dict(commodity=names) - ) - data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) - return data +def read_commodity_costs_csv(buffer_, con): + sql = """CREATE TABLE commodity_costs ( + year BIGINT, + region VARCHAR REFERENCES regions(name), + commodity VARCHAR REFERENCES commodities(name), + value DOUBLE, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("""INSERT INTO commodity_costs SELECT + year, region, commodity_name, value FROM rel;""") + return con.sql("SELECT * from commodity_costs").fetchnumpy() def read_demand_csv(buffer_, con): @@ -74,3 +85,47 @@ def read_demand_csv(buffer_, con): rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO demand SELECT year, commodity_name, region, demand FROM rel;") return con.sql("SELECT * from demand").fetchnumpy() + + +def read_demand_slicing_csv(buffer_, con): + sql = """CREATE TABLE demand_slicing ( + commodity VARCHAR REFERENCES commodities(name), + region VARCHAR REFERENCES regions(name), + timeslice VARCHAR, + fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), + year BIGINT, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("""INSERT INTO demand_slicing SELECT + commodity, region, timeslice, fraction, year FROM rel;""") + return con.sql("SELECT * from demand_slicing").fetchnumpy() + + +def read_regions_csv(buffer_, con): + sql = """CREATE TABLE regions ( + name VARCHAR PRIMARY KEY, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO regions SELECT name FROM rel;") + return con.sql("SELECT name from regions").fetchnumpy() + + +def calculate_global_commodities(commodities): + names = commodities["name"].astype(np.dtype("str")) + types = commodities["type"].astype(np.dtype("str")) + units = commodities["unit"].astype(np.dtype("str")) + + type_array = xr.DataArray( + data=types, dims=["commodity"], coords=dict(commodity=names) + ) + + unit_array = xr.DataArray( + data=units, dims=["commodity"], coords=dict(commodity=names) + ) + + data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) + return data From 71d6388cab4c886140397a5608c1940930a41c7b Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Thu, 4 Jul 2024 09:29:16 +0100 Subject: [PATCH 12/43] Update tables for new csv columns --- src/muse/new_input/readers.py | 40 +++++++++++++++++------------------ tests/test_readers.py | 28 ++++++++++++------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index a02f40a84..b9228f5bf 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -31,21 +31,21 @@ def read_inputs(data_dir): def read_commodities_csv(buffer_, con): sql = """CREATE TABLE commodities ( - name VARCHAR PRIMARY KEY, + id VARCHAR PRIMARY KEY, type VARCHAR CHECK (type IN ('energy', 'service', 'material', 'environmental')), unit VARCHAR, ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO commodities SELECT name, type, unit FROM rel;") - return con.sql("select name, type, unit from commodities").fetchnumpy() + con.sql("INSERT INTO commodities SELECT id, type, unit FROM rel;") + return con.sql("select * from commodities").fetchnumpy() def read_commodity_trade_csv(buffer_, con): sql = """CREATE TABLE commodity_trade ( - commodity VARCHAR REFERENCES commodities(name), - region VARCHAR REFERENCES regions(name), + commodity VARCHAR REFERENCES commodities(id), + region VARCHAR REFERENCES regions(id), year BIGINT, import DOUBLE, export DOUBLE, @@ -54,68 +54,68 @@ def read_commodity_trade_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO commodity_trade SELECT - commodity, region, year, import, export FROM rel;""") + commodity_id, region_id, year, import, export FROM rel;""") return con.sql("SELECT * from commodity_trade").fetchnumpy() def read_commodity_costs_csv(buffer_, con): sql = """CREATE TABLE commodity_costs ( + commodity VARCHAR REFERENCES commodities(id), + region VARCHAR REFERENCES regions(id), year BIGINT, - region VARCHAR REFERENCES regions(name), - commodity VARCHAR REFERENCES commodities(name), value DOUBLE, ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO commodity_costs SELECT - year, region, commodity_name, value FROM rel;""") + commidity_id, region_id, year, value FROM rel;""") return con.sql("SELECT * from commodity_costs").fetchnumpy() def read_demand_csv(buffer_, con): sql = """CREATE TABLE demand ( + commodity VARCHAR REFERENCES commodities(id), + region VARCHAR REFERENCES regions(id), year BIGINT, - commodity VARCHAR REFERENCES commodities(name), - region VARCHAR REFERENCES regions(name), demand DOUBLE, ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO demand SELECT year, commodity_name, region, demand FROM rel;") + con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") return con.sql("SELECT * from demand").fetchnumpy() def read_demand_slicing_csv(buffer_, con): sql = """CREATE TABLE demand_slicing ( - commodity VARCHAR REFERENCES commodities(name), - region VARCHAR REFERENCES regions(name), + commodity VARCHAR REFERENCES commodities(id), + region VARCHAR REFERENCES regions(id), + year BIGINT, timeslice VARCHAR, fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), - year BIGINT, ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO demand_slicing SELECT - commodity, region, timeslice, fraction, year FROM rel;""") + commodity_id, region_id, year, timeslice, fraction FROM rel;""") return con.sql("SELECT * from demand_slicing").fetchnumpy() def read_regions_csv(buffer_, con): sql = """CREATE TABLE regions ( - name VARCHAR PRIMARY KEY, + id VARCHAR PRIMARY KEY, ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO regions SELECT name FROM rel;") - return con.sql("SELECT name from regions").fetchnumpy() + con.sql("INSERT INTO regions SELECT id FROM rel;") + return con.sql("SELECT * from regions").fetchnumpy() def calculate_global_commodities(commodities): - names = commodities["name"].astype(np.dtype("str")) + names = commodities["id"].astype(np.dtype("str")) types = commodities["type"].astype(np.dtype("str")) units = commodities["unit"].astype(np.dtype("str")) diff --git a/tests/test_readers.py b/tests/test_readers.py index 107a5bfb1..221ec097b 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -332,14 +332,6 @@ def con(): return duckdb.connect(":memory:") -@fixture -def populate_regions(default_new_input, con): - from muse.new_input.readers import read_regions_csv - - with open(default_new_input / "regions.csv") as f: - return read_regions_csv(f, con) - - @fixture def populate_commodities(default_new_input, con): from muse.new_input.readers import read_commodities_csv @@ -356,13 +348,21 @@ def populate_demand(default_new_input, con, populate_regions, populate_commoditi return read_demand_csv(f, con) +@fixture +def populate_regions(default_new_input, con): + from muse.new_input.readers import read_regions_csv + + with open(default_new_input / "regions.csv") as f: + return read_regions_csv(f, con) + + def test_read_regions(populate_regions): - assert populate_regions["name"] == np.array(["R1"]) + assert populate_regions["id"] == np.array(["R1"]) def test_read_new_global_commodities(populate_commodities): data = populate_commodities - assert list(data["name"]) == ["electricity", "gas", "heat", "wind", "CO2f"] + assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] assert list(data["type"]) == ["energy"] * 5 assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] @@ -377,7 +377,7 @@ def test_calculate_global_commodities(populate_commodities): for dt in data.dtypes.values(): assert np.issubdtype(dt, np.dtype("str")) - assert list(data.coords["commodity"].values) == list(populate_commodities["name"]) + assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) @@ -385,7 +385,7 @@ def test_calculate_global_commodities(populate_commodities): def test_read_new_global_commodities_type_constraint(default_new_input, con): from muse.new_input.readers import read_commodities_csv - csv = StringIO("name,type,unit\nfoo,invalid,bar\n") + csv = StringIO("id,type,unit\nfoo,invalid,bar\n") with raises(duckdb.ConstraintException): read_commodities_csv(csv, con) @@ -403,7 +403,7 @@ def test_new_read_demand_csv_commodity_constraint( ): from muse.new_input.readers import read_demand_csv - csv = StringIO("year,commodity_name,region,demand\n2020,invalid,R1,0\n") + csv = StringIO("year,commodity_id,region_id,demand\n2020,invalid,R1,0\n") with raises(duckdb.ConstraintException, match=".*foreign key.*"): read_demand_csv(csv, con) @@ -413,7 +413,7 @@ def test_new_read_demand_csv_region_constraint( ): from muse.new_input.readers import read_demand_csv - csv = StringIO("year,commodity_name,region,demand\n2020,heat,invalid,0\n") + csv = StringIO("year,commodity_id,region_id,demand\n2020,heat,invalid,0\n") with raises(duckdb.ConstraintException, match=".*foreign key.*"): read_demand_csv(csv, con) From e3b6ece3b450e8263b5c6edc50629381d630aa8d Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 5 Jul 2024 14:15:29 +0100 Subject: [PATCH 13/43] Split new tests into new file --- tests/test_new_readers.py | 222 ++++++++++++++++++++++++++++++++++++++ tests/test_readers.py | 173 ----------------------------- 2 files changed, 222 insertions(+), 173 deletions(-) create mode 100644 tests/test_new_readers.py diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py new file mode 100644 index 000000000..6c82434d6 --- /dev/null +++ b/tests/test_new_readers.py @@ -0,0 +1,222 @@ +from io import StringIO + +import duckdb +import numpy as np +import xarray as xr +from pytest import approx, fixture, mark, raises + + +@fixture +def default_new_input(tmp_path): + from muse.examples import copy_model + + copy_model("default_new_input", tmp_path) + return tmp_path / "model" + + +@fixture +def con(): + return duckdb.connect(":memory:") + + +@fixture +def populate_commodities(default_new_input, con): + from muse.new_input.readers import read_commodities_csv + + with open(default_new_input / "commodities.csv") as f: + return read_commodities_csv(f, con) + + +@fixture +def populate_demand(default_new_input, con, populate_regions, populate_commodities): + from muse.new_input.readers import read_demand_csv + + with open(default_new_input / "demand.csv") as f: + return read_demand_csv(f, con) + + +@fixture +def populate_regions(default_new_input, con): + from muse.new_input.readers import read_regions_csv + + with open(default_new_input / "regions.csv") as f: + return read_regions_csv(f, con) + + +def test_read_regions(populate_regions): + assert populate_regions["id"] == np.array(["R1"]) + + +def test_read_new_global_commodities(populate_commodities): + data = populate_commodities + assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] + assert list(data["type"]) == ["energy"] * 5 + assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] + + +def test_calculate_global_commodities(populate_commodities): + from muse.new_input.readers import calculate_global_commodities + + data = calculate_global_commodities(populate_commodities) + + assert isinstance(data, xr.Dataset) + assert set(data.dims) == {"commodity"} + for dt in data.dtypes.values(): + assert np.issubdtype(dt, np.dtype("str")) + + assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) + assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) + assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) + + +def test_read_new_global_commodities_type_constraint(default_new_input, con): + from muse.new_input.readers import read_commodities_csv + + csv = StringIO("id,type,unit\nfoo,invalid,bar\n") + with raises(duckdb.ConstraintException): + read_commodities_csv(csv, con) + + +def test_new_read_demand_csv(populate_demand): + data = populate_demand + assert np.all(data["year"] == np.array([2020, 2050])) + assert np.all(data["commodity"] == np.array(["heat", "heat"])) + assert np.all(data["region"] == np.array(["R1", "R1"])) + assert np.all(data["demand"] == np.array([10, 30])) + + +def test_new_read_demand_csv_commodity_constraint( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_demand_csv + + csv = StringIO("year,commodity_id,region_id,demand\n2020,invalid,R1,0\n") + with raises(duckdb.ConstraintException, match=".*foreign key.*"): + read_demand_csv(csv, con) + + +def test_new_read_demand_csv_region_constraint( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_demand_csv + + csv = StringIO("year,commodity_id,region_id,demand\n2020,heat,invalid,0\n") + with raises(duckdb.ConstraintException, match=".*foreign key.*"): + read_demand_csv(csv, con) + + +@mark.xfail +def test_demand_dataset(default_new_input): + import duckdb + + from muse.new_input.readers import read_commodities, read_demand, read_regions + + con = duckdb.connect(":memory:") + + read_regions(default_new_input, con) + read_commodities(default_new_input, con) + data = read_demand(default_new_input, con) + + assert isinstance(data, xr.DataArray) + assert data.dtype == np.float64 + + assert set(data.dims) == {"year", "commodity", "region", "timeslice"} + assert list(data.coords["region"].values) == ["R1"] + assert list(data.coords["timeslice"].values) == list(range(1, 7)) + assert list(data.coords["year"].values) == [2020, 2050] + assert set(data.coords["commodity"].values) == { + "electricity", + "gas", + "heat", + "wind", + "CO2f", + } + + assert data.sel(year=2020, commodity="electricity", region="R1", timeslice=0) == 1 + + +@mark.xfail +def test_new_read_initial_market(default_new_input): + from muse.new_input.readers import read_inputs + + all_data = read_inputs(default_new_input) + data = all_data["initial_market"] + + assert isinstance(data, xr.Dataset) + assert set(data.dims) == {"region", "year", "commodity", "timeslice"} + assert dict(data.dtypes) == dict( + prices=np.float64, + exports=np.float64, + imports=np.float64, + static_trade=np.float64, + ) + assert list(data.coords["region"].values) == ["R1"] + assert list(data.coords["year"].values) == list(range(2010, 2105, 5)) + assert list(data.coords["commodity"].values) == [ + "electricity", + "gas", + "heat", + "CO2f", + "wind", + ] + month_values = ["all-year"] * 6 + day_values = ["all-week"] * 6 + hour_values = [ + "night", + "morning", + "afternoon", + "early-peak", + "late-peak", + "evening", + ] + + assert list(data.coords["timeslice"].values) == list( + zip(month_values, day_values, hour_values) + ) + assert list(data.coords["month"]) == month_values + assert list(data.coords["day"]) == day_values + assert list(data.coords["hour"]) == hour_values + + assert all(var.coords.equals(data.coords) for var in data.data_vars.values()) + + prices = data.data_vars["prices"] + assert approx( + prices.sel( + year=2010, + region="R1", + commodity="electricity", + timeslice=("all-year", "all-week", "night"), + ) + - 14.81481, + abs=1e-4, + ) + + exports = data.data_vars["exports"] + assert ( + exports.sel( + year=2010, + region="R1", + commodity="electricity", + timeslice=("all-year", "all-week", "night"), + ) + ) == 0 + + imports = data.data_vars["imports"] + assert ( + imports.sel( + year=2010, + region="R1", + commodity="electricity", + timeslice=("all-year", "all-week", "night"), + ) + ) == 0 + + static_trade = data.data_vars["static_trade"] + assert ( + static_trade.sel( + year=2010, + region="R1", + commodity="electricity", + timeslice=("all-year", "all-week", "night"), + ) + ) == 0 diff --git a/tests/test_readers.py b/tests/test_readers.py index 221ec097b..e0b4bcd63 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -1,9 +1,6 @@ -from io import StringIO from itertools import chain, permutations from pathlib import Path -import duckdb -import numpy as np import pandas as pd import toml import xarray as xr @@ -293,173 +290,3 @@ def test_get_nan_coordinates(): dataset1 = xr.Dataset.from_dataframe(df1.set_index(["region", "year"])) nan_coords1 = get_nan_coordinates(dataset1) assert nan_coords1 == [("R1", 2021)] - - # Test 2: Missing coordinate combinations - df2 = pd.DataFrame( - { - "region": ["R1", "R1", "R2"], # Missing R2-2021 - "year": [2020, 2021, 2020], - "value": [1.0, 2.0, 3.0], - } - ) - dataset2 = xr.Dataset.from_dataframe(df2.set_index(["region", "year"])) - nan_coords2 = get_nan_coordinates(dataset2) - assert nan_coords2 == [("R2", 2021)] - - # Test 3: No NaN values - df3 = pd.DataFrame( - { - "region": ["R1", "R1", "R2", "R2"], - "year": [2020, 2021, 2020, 2021], - "value": [1.0, 2.0, 3.0, 4.0], - } - ) - dataset3 = xr.Dataset.from_dataframe(df3.set_index(["region", "year"])) - nan_coords3 = get_nan_coordinates(dataset3) - assert nan_coords3 == [] - - -@fixture -def default_new_input(tmp_path): - from muse.examples import copy_model - - copy_model("default_new_input", tmp_path) - return tmp_path / "model" - - -@fixture -def con(): - return duckdb.connect(":memory:") - - -@fixture -def populate_commodities(default_new_input, con): - from muse.new_input.readers import read_commodities_csv - - with open(default_new_input / "commodities.csv") as f: - return read_commodities_csv(f, con) - - -@fixture -def populate_demand(default_new_input, con, populate_regions, populate_commodities): - from muse.new_input.readers import read_demand_csv - - with open(default_new_input / "demand.csv") as f: - return read_demand_csv(f, con) - - -@fixture -def populate_regions(default_new_input, con): - from muse.new_input.readers import read_regions_csv - - with open(default_new_input / "regions.csv") as f: - return read_regions_csv(f, con) - - -def test_read_regions(populate_regions): - assert populate_regions["id"] == np.array(["R1"]) - - -def test_read_new_global_commodities(populate_commodities): - data = populate_commodities - assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] - assert list(data["type"]) == ["energy"] * 5 - assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] - - -def test_calculate_global_commodities(populate_commodities): - from muse.new_input.readers import calculate_global_commodities - - data = calculate_global_commodities(populate_commodities) - - assert isinstance(data, xr.Dataset) - assert set(data.dims) == {"commodity"} - for dt in data.dtypes.values(): - assert np.issubdtype(dt, np.dtype("str")) - - assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) - assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) - assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) - - -def test_read_new_global_commodities_type_constraint(default_new_input, con): - from muse.new_input.readers import read_commodities_csv - - csv = StringIO("id,type,unit\nfoo,invalid,bar\n") - with raises(duckdb.ConstraintException): - read_commodities_csv(csv, con) - - -def test_new_read_demand_csv(populate_demand): - data = populate_demand - assert np.all(data["year"] == np.array([2020, 2050])) - assert np.all(data["commodity"] == np.array(["heat", "heat"])) - assert np.all(data["region"] == np.array(["R1", "R1"])) - assert np.all(data["demand"] == np.array([10, 30])) - - -def test_new_read_demand_csv_commodity_constraint( - default_new_input, con, populate_commodities, populate_regions -): - from muse.new_input.readers import read_demand_csv - - csv = StringIO("year,commodity_id,region_id,demand\n2020,invalid,R1,0\n") - with raises(duckdb.ConstraintException, match=".*foreign key.*"): - read_demand_csv(csv, con) - - -def test_new_read_demand_csv_region_constraint( - default_new_input, con, populate_commodities, populate_regions -): - from muse.new_input.readers import read_demand_csv - - csv = StringIO("year,commodity_id,region_id,demand\n2020,heat,invalid,0\n") - with raises(duckdb.ConstraintException, match=".*foreign key.*"): - read_demand_csv(csv, con) - - -@mark.xfail -def test_demand_dataset(default_new_input): - import duckdb - - from muse.new_input.readers import read_commodities, read_demand, read_regions - - con = duckdb.connect(":memory:") - - read_regions(default_new_input, con) - read_commodities(default_new_input, con) - data = read_demand(default_new_input, con) - - assert isinstance(data, xr.DataArray) - assert data.dtype == np.float64 - - assert set(data.dims) == {"year", "commodity", "region", "timeslice"} - assert list(data.coords["region"].values) == ["R1"] - assert list(data.coords["timeslice"].values) == list(range(1, 7)) - assert list(data.coords["year"].values) == [2020, 2050] - assert set(data.coords["commodity"].values) == { - "electricity", - "gas", - "heat", - "wind", - "CO2f", - } - - assert data.sel(year=2020, commodity="electricity", region="R1", timeslice=0) == 1 - - -@mark.xfail -def test_new_read_initial_market(default_new_input): - from muse.new_input.readers import read_inputs - - all_data = read_inputs(default_new_input) - data = all_data["initial_market"] - - assert isinstance(data, xr.Dataset) - assert set(data.dims) == {"region", "year", "commodity", "timeslice"} - assert dict(data.dtypes) == dict( - prices=np.float64, - exports=np.float64, - imports=np.float64, - static_trade=np.float64, - ) From 5b827cde3601235da2130d2a2636404fd3c1a608 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 8 Jul 2024 10:28:08 +0100 Subject: [PATCH 14/43] Tests for new tables --- src/muse/new_input/readers.py | 2 +- tests/test_new_readers.py | 80 ++++++++++++++++++++++++++++------- 2 files changed, 65 insertions(+), 17 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index b9228f5bf..4e2c09de0 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -69,7 +69,7 @@ def read_commodity_costs_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO commodity_costs SELECT - commidity_id, region_id, year, value FROM rel;""") + commodity_id, region_id, year, value FROM rel;""") return con.sql("SELECT * from commodity_costs").fetchnumpy() diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 6c82434d6..467215bba 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -27,6 +27,26 @@ def populate_commodities(default_new_input, con): return read_commodities_csv(f, con) +@fixture +def populate_commodity_trade( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_commodity_trade_csv + + with open(default_new_input / "commodity_trade.csv") as f: + return read_commodity_trade_csv(f, con) + + +@fixture +def populate_commodity_costs( + default_new_input, con, populate_commodities, populate_regions +): + from muse.new_input.readers import read_commodity_costs_csv + + with open(default_new_input / "commodity_costs.csv") as f: + return read_commodity_costs_csv(f, con) + + @fixture def populate_demand(default_new_input, con, populate_regions, populate_commodities): from muse.new_input.readers import read_demand_csv @@ -35,6 +55,16 @@ def populate_demand(default_new_input, con, populate_regions, populate_commoditi return read_demand_csv(f, con) +@fixture +def populate_demand_slicing( + default_new_input, con, populate_regions, populate_commodities +): + from muse.new_input.readers import read_demand_slicing_csv + + with open(default_new_input / "demand_slicing.csv") as f: + return read_demand_slicing_csv(f, con) + + @fixture def populate_regions(default_new_input, con): from muse.new_input.readers import read_regions_csv @@ -43,17 +73,43 @@ def populate_regions(default_new_input, con): return read_regions_csv(f, con) -def test_read_regions(populate_regions): - assert populate_regions["id"] == np.array(["R1"]) - - -def test_read_new_global_commodities(populate_commodities): +def test_read_commodities_csv(populate_commodities): data = populate_commodities assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] assert list(data["type"]) == ["energy"] * 5 assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] +def test_read_commodity_trade_csv(populate_commodity_trade): + data = populate_commodity_trade + assert data["commodity"].size == 0 + assert data["region"].size == 0 + assert data["year"].size == 0 + assert data["import"].size == 0 + assert data["export"].size == 0 + + +def test_read_commodity_costs_csv(populate_commodity_costs): + data = populate_commodity_costs + # Only checking the first element of each array, as the table is large + assert next(iter(data["commodity"])) == "electricity" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["year"])) == 2010 + assert next(iter(data["value"])) == approx(14.81481) + + +def test_read_demand_csv(populate_demand): + data = populate_demand + assert np.all(data["year"] == np.array([2020, 2050])) + assert np.all(data["commodity"] == np.array(["heat", "heat"])) + assert np.all(data["region"] == np.array(["R1", "R1"])) + assert np.all(data["demand"] == np.array([10, 30])) + + +def test_read_regions_csv(populate_regions): + assert populate_regions["id"] == np.array(["R1"]) + + def test_calculate_global_commodities(populate_commodities): from muse.new_input.readers import calculate_global_commodities @@ -69,7 +125,7 @@ def test_calculate_global_commodities(populate_commodities): assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) -def test_read_new_global_commodities_type_constraint(default_new_input, con): +def test_read_global_commodities_type_constraint(default_new_input, con): from muse.new_input.readers import read_commodities_csv csv = StringIO("id,type,unit\nfoo,invalid,bar\n") @@ -77,15 +133,7 @@ def test_read_new_global_commodities_type_constraint(default_new_input, con): read_commodities_csv(csv, con) -def test_new_read_demand_csv(populate_demand): - data = populate_demand - assert np.all(data["year"] == np.array([2020, 2050])) - assert np.all(data["commodity"] == np.array(["heat", "heat"])) - assert np.all(data["region"] == np.array(["R1", "R1"])) - assert np.all(data["demand"] == np.array([10, 30])) - - -def test_new_read_demand_csv_commodity_constraint( +def test_read_demand_csv_commodity_constraint( default_new_input, con, populate_commodities, populate_regions ): from muse.new_input.readers import read_demand_csv @@ -95,7 +143,7 @@ def test_new_read_demand_csv_commodity_constraint( read_demand_csv(csv, con) -def test_new_read_demand_csv_region_constraint( +def test_read_demand_csv_region_constraint( default_new_input, con, populate_commodities, populate_regions ): from muse.new_input.readers import read_demand_csv From 73014f16607c6b2073308f84a0b84f2cd7d602a9 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Thu, 15 Aug 2024 10:39:58 +0100 Subject: [PATCH 15/43] Add functions for demand data and (in progress) initial market --- src/muse/new_input/readers.py | 264 +++++++++++++++++++++++++++++++--- tests/test_new_readers.py | 127 ++++++++++------ 2 files changed, 329 insertions(+), 62 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 4e2c09de0..b216ba0d4 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,5 +1,6 @@ import duckdb import numpy as np +import pandas as pd import xarray as xr @@ -7,28 +8,54 @@ def read_inputs(data_dir): data = {} con = duckdb.connect(":memory:") + with open(data_dir / "timeslices.csv") as f: + timeslices = read_timeslices_csv(f, con) + with open(data_dir / "commodities.csv") as f: commodities = read_commodities_csv(f, con) + with open(data_dir / "regions.csv") as f: + regions = read_regions_csv(f, con) + with open(data_dir / "commodity_trade.csv") as f: - commodity_trade = read_commodity_trade_csv(f, con) # noqa: F841 + commodity_trade = read_commodity_trade_csv(f, con) with open(data_dir / "commodity_costs.csv") as f: - commodity_costs = read_commodity_costs_csv(f, con) # noqa: F841 + commodity_costs = read_commodity_costs_csv(f, con) with open(data_dir / "demand.csv") as f: - demand = read_demand_csv(f, con) # noqa: F841 + demand = read_demand_csv(f, con) with open(data_dir / "demand_slicing.csv") as f: - demand_slicing = read_demand_slicing_csv(f, con) # noqa: F841 - - with open(data_dir / "regions.csv") as f: - regions = read_regions_csv(f, con) # noqa: F841 + demand_slicing = read_demand_slicing_csv(f, con) data["global_commodities"] = calculate_global_commodities(commodities) + data["demand"] = calculate_demand( + commodities, regions, timeslices, demand, demand_slicing + ) + data["initial_market"] = calculate_initial_market( + commodities, regions, timeslices, commodity_trade, commodity_costs + ) return data +def read_timeslices_csv(buffer_, con): + sql = """CREATE TABLE timeslices ( + id VARCHAR PRIMARY KEY, + season VARCHAR, + day VARCHAR, + time_of_day VARCHAR, + fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + "INSERT INTO timeslices SELECT id, season, day, time_of_day, fraction FROM rel;" + ) + return con.sql("SELECT * from timeslices").fetchnumpy() + + def read_commodities_csv(buffer_, con): sql = """CREATE TABLE commodities ( id VARCHAR PRIMARY KEY, @@ -42,6 +69,17 @@ def read_commodities_csv(buffer_, con): return con.sql("select * from commodities").fetchnumpy() +def read_regions_csv(buffer_, con): + sql = """CREATE TABLE regions ( + id VARCHAR PRIMARY KEY, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO regions SELECT id FROM rel;") + return con.sql("SELECT * from regions").fetchnumpy() + + def read_commodity_trade_csv(buffer_, con): sql = """CREATE TABLE commodity_trade ( commodity VARCHAR REFERENCES commodities(id), @@ -49,6 +87,7 @@ def read_commodity_trade_csv(buffer_, con): year BIGINT, import DOUBLE, export DOUBLE, + PRIMARY KEY (commodity, region, year) ); """ con.sql(sql) @@ -64,6 +103,7 @@ def read_commodity_costs_csv(buffer_, con): region VARCHAR REFERENCES regions(id), year BIGINT, value DOUBLE, + PRIMARY KEY (commodity, region, year) ); """ con.sql(sql) @@ -79,6 +119,7 @@ def read_demand_csv(buffer_, con): region VARCHAR REFERENCES regions(id), year BIGINT, demand DOUBLE, + PRIMARY KEY (commodity, region, year) ); """ con.sql(sql) @@ -92,28 +133,19 @@ def read_demand_slicing_csv(buffer_, con): commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), year BIGINT, - timeslice VARCHAR, + timeslice VARCHAR REFERENCES timeslices(id), fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), + PRIMARY KEY (commodity, region, year, timeslice), + FOREIGN KEY (commodity, region, year) REFERENCES demand(commodity, region, year) ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO demand_slicing SELECT - commodity_id, region_id, year, timeslice, fraction FROM rel;""") + commodity_id, region_id, year, timeslice_id, fraction FROM rel;""") return con.sql("SELECT * from demand_slicing").fetchnumpy() -def read_regions_csv(buffer_, con): - sql = """CREATE TABLE regions ( - id VARCHAR PRIMARY KEY, - ); - """ - con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO regions SELECT id FROM rel;") - return con.sql("SELECT * from regions").fetchnumpy() - - def calculate_global_commodities(commodities): names = commodities["id"].astype(np.dtype("str")) types = commodities["type"].astype(np.dtype("str")) @@ -129,3 +161,195 @@ def calculate_global_commodities(commodities): data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) return data + + +def calculate_demand( + commodities, regions, timeslices, demand, demand_slicing +) -> xr.DataArray: + """Calculate demand data for all commodities, regions, years, and timeslices. + + Result: A DataArray with a demand value for every combination of: + - commodity: all commodities specified in the commodities table + - region: all regions specified in the regions table + - year: all years specified in the demand table + - timeslice: all timeslices specified in the timeslices table + + Checks: + - If demand data is specified for one year, it must be specified for all years. + - If demand is nonzero, slicing data must be present. + - If slicing data is specified for a commodity/region/year, the sum of + the fractions must be 1, and all timeslices must be present. + + Fills: + - If demand data is not specified for a commodity/region combination, the demand is + 0 for all years and timeslices. + + Todo: + - Interpolation to allow for missing years in demand data. + - Ability to leave the year field blank in both tables to indicate all years + - Allow slicing data to be missing -> demand is spread equally across timeslices + - Allow more flexibility for timeslices (e.g. can specify "winter" to apply to all + winter timeslices, or "all" to apply to all timeslices) + """ + # Prepare dataframes + df_demand = pd.DataFrame(demand).set_index(["commodity", "region", "year"]) + df_slicing = pd.DataFrame(demand_slicing).set_index( + ["commodity", "region", "year", "timeslice"] + ) + + # DataArray dimensions + all_commodities = commodities["id"].astype(np.dtype("str")) + all_regions = regions["id"].astype(np.dtype("str")) + all_years = df_demand.index.get_level_values("year").unique() + all_timeslices = timeslices["id"].astype(np.dtype("str")) + + # CHECK: all years are specified for each commodity/region combination + check_all_values_specified(df_demand, ["commodity", "region"], "year", all_years) + + # CHECK: if slicing data is present, all timeslices must be specified + check_all_values_specified( + df_slicing, ["commodity", "region", "year"], "timeslice", all_timeslices + ) + + # CHECK: timeslice fractions sum to 1 + check_timeslice_sum = df_slicing.groupby(["commodity", "region", "year"]).apply( + lambda x: np.isclose(x["fraction"].sum(), 1) + ) + if not check_timeslice_sum.all(): + raise DataValidationError + + # CHECK: if demand data >0, fraction data must be specified + check_fraction_data_present = ( + df_demand[df_demand["demand"] > 0] + .index.isin(df_slicing.droplevel("timeslice").index) + .all() + ) + if not check_fraction_data_present.all(): + raise DataValidationError + + # FILL: demand is zero if unspecified + df_demand = df_demand.reindex( + pd.MultiIndex.from_product( + [all_commodities, all_regions, all_years], + names=["commodity", "region", "year"], + ), + fill_value=0, + ) + + # FILL: slice data is zero if unspecified + df_slicing = df_slicing.reindex( + pd.MultiIndex.from_product( + [all_commodities, all_regions, all_years, all_timeslices], + names=["commodity", "region", "year", "timeslice"], + ), + fill_value=0, + ) + + # Create DataArray + da_demand = df_demand.to_xarray()["demand"] + da_slicing = df_slicing.to_xarray()["fraction"] + data = da_demand * da_slicing + return data + + +def calculate_initial_market( + commodities, regions, timeslices, commodity_trade, commodity_costs +) -> xr.Dataset: + """Calculate trade and price data for all commodities, regions and years. + + Result: A Dataset with variables: + - prices + - exports + - imports + - static_trade + For every combination of: + - commodity: all commodities specified in the commodities table + - region: all regions specified in the regions table + - year: all years specified in the commodity_costs table + - timeslice (multiindex): all timeslices specified in the timeslices table + + Checks: + - If trade data is specified for one year, it must be specified for all years. + - If price data is specified for one year, it must be specified for all years. + + Fills: + - If trade data is not specified for a commodity/region combination, imports and + exports are both zero + - If price data is not specified for a commodity/region combination, the price is + zero + + """ + from muse.timeslices import QuantityType, convert_timeslice + + # Prepare dataframes + df_trade = pd.DataFrame(commodity_trade).set_index(["commodity", "region", "year"]) + df_costs = ( + pd.DataFrame(commodity_costs) + .set_index(["commodity", "region", "year"]) + .rename(columns={"value": "prices"}) + ) + df_timeslices = pd.DataFrame(timeslices).set_index(["season", "day", "time_of_day"]) + + # DataArray dimensions + all_commodities = commodities["id"].astype(np.dtype("str")) + all_regions = regions["id"].astype(np.dtype("str")) + all_years = df_costs.index.get_level_values("year").unique() + + # CHECK: all years are specified for each commodity/region combination + check_all_values_specified(df_trade, ["commodity", "region"], "year", all_years) + check_all_values_specified(df_costs, ["commodity", "region"], "year", all_years) + + # FILL: price is zero if unspecified + df_costs = df_costs.reindex( + pd.MultiIndex.from_product( + [all_commodities, all_regions, all_years], + names=["commodity", "region", "year"], + ), + fill_value=0, + ) + + # FILL: trade is zero if unspecified + df_trade = df_trade.reindex( + pd.MultiIndex.from_product( + [all_commodities, all_regions, all_years], + names=["commodity", "region", "year"], + ), + fill_value=0, + ) + + # Calculate static trade + df_trade["static_trade"] = df_trade["export"] - df_trade["import"] + + # Create Data + df_full = df_costs.join(df_trade) + data = df_full.to_xarray() + ts = df_timeslices.to_xarray()["fraction"] + ts = ts.stack(timeslice=("season", "day", "time_of_day")) + convert_timeslice(data, ts, QuantityType.EXTENSIVE) + + return data + + +class DataValidationError(ValueError): + pass + + +def check_all_values_specified( + df: pd.DataFrame, group_by_cols: list[str], column_name: str, values: list +) -> None: + """Check that the required values are specified in a dataframe. + + Checks that a row exists for all specified values of column_name for each + group in the grouped dataframe. + """ + if not ( + df.groupby(group_by_cols) + .apply( + lambda x: ( + set(x.index.get_level_values(column_name).unique()) == set(values) + ) + ) + .all() + ).all(): + msg = "" # TODO + raise DataValidationError(msg) diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 467215bba..483d42627 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -57,7 +57,7 @@ def populate_demand(default_new_input, con, populate_regions, populate_commoditi @fixture def populate_demand_slicing( - default_new_input, con, populate_regions, populate_commodities + default_new_input, con, populate_regions, populate_commodities, populate_demand ): from muse.new_input.readers import read_demand_slicing_csv @@ -73,6 +73,28 @@ def populate_regions(default_new_input, con): return read_regions_csv(f, con) +@fixture +def populate_timeslices(default_new_input, con): + from muse.new_input.readers import read_timeslices_csv + + with open(default_new_input / "timeslices.csv") as f: + return read_timeslices_csv(f, con) + + +def test_read_timeslices_csv(populate_timeslices): + data = populate_timeslices + assert len(data["id"]) == 6 + assert next(iter(data["id"])) == "1" + assert next(iter(data["season"])) == "all" + assert next(iter(data["day"])) == "all" + assert next(iter(data["time_of_day"])) == "night" + assert next(iter(data["fraction"])) == approx(0.1667) + + +def test_read_regions_csv(populate_regions): + assert populate_regions["id"] == np.array(["R1"]) + + def test_read_commodities_csv(populate_commodities): data = populate_commodities assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] @@ -106,26 +128,18 @@ def test_read_demand_csv(populate_demand): assert np.all(data["demand"] == np.array([10, 30])) -def test_read_regions_csv(populate_regions): - assert populate_regions["id"] == np.array(["R1"]) - - -def test_calculate_global_commodities(populate_commodities): - from muse.new_input.readers import calculate_global_commodities - - data = calculate_global_commodities(populate_commodities) - - assert isinstance(data, xr.Dataset) - assert set(data.dims) == {"commodity"} - for dt in data.dtypes.values(): - assert np.issubdtype(dt, np.dtype("str")) - - assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) - assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) - assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) +def test_read_demand_slicing_csv(populate_demand_slicing): + data = populate_demand_slicing + assert np.all(data["commodity"] == "heat") + assert np.all(data["region"] == "R1") + # assert np.all(data["timeslice"] == np.array([0, 1])) + assert np.all( + data["fraction"] + == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2, 0.1, 0.15, 0.1, 0.15, 0.3, 0.2]) + ) -def test_read_global_commodities_type_constraint(default_new_input, con): +def test_read_commodities_csv_type_constraint(con): from muse.new_input.readers import read_commodities_csv csv = StringIO("id,type,unit\nfoo,invalid,bar\n") @@ -134,7 +148,7 @@ def test_read_global_commodities_type_constraint(default_new_input, con): def test_read_demand_csv_commodity_constraint( - default_new_input, con, populate_commodities, populate_regions + con, populate_commodities, populate_regions ): from muse.new_input.readers import read_demand_csv @@ -143,9 +157,7 @@ def test_read_demand_csv_commodity_constraint( read_demand_csv(csv, con) -def test_read_demand_csv_region_constraint( - default_new_input, con, populate_commodities, populate_regions -): +def test_read_demand_csv_region_constraint(con, populate_commodities, populate_regions): from muse.new_input.readers import read_demand_csv csv = StringIO("year,commodity_id,region_id,demand\n2020,heat,invalid,0\n") @@ -153,24 +165,44 @@ def test_read_demand_csv_region_constraint( read_demand_csv(csv, con) -@mark.xfail -def test_demand_dataset(default_new_input): - import duckdb +def test_calculate_global_commodities(populate_commodities): + from muse.new_input.readers import calculate_global_commodities + + data = calculate_global_commodities(populate_commodities) + + assert isinstance(data, xr.Dataset) + assert set(data.dims) == {"commodity"} + for dt in data.dtypes.values(): + assert np.issubdtype(dt, np.dtype("str")) - from muse.new_input.readers import read_commodities, read_demand, read_regions + assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) + assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) + assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) - con = duckdb.connect(":memory:") - read_regions(default_new_input, con) - read_commodities(default_new_input, con) - data = read_demand(default_new_input, con) +def test_calculate_demand( + populate_commodities, + populate_regions, + populate_timeslices, + populate_demand, + populate_demand_slicing, +): + from muse.new_input.readers import calculate_demand + + data = calculate_demand( + populate_commodities, + populate_regions, + populate_timeslices, + populate_demand, + populate_demand_slicing, + ) assert isinstance(data, xr.DataArray) assert data.dtype == np.float64 assert set(data.dims) == {"year", "commodity", "region", "timeslice"} assert list(data.coords["region"].values) == ["R1"] - assert list(data.coords["timeslice"].values) == list(range(1, 7)) + assert list(data.coords["timeslice"].values) == ["1", "2", "3", "4", "5", "6"] assert list(data.coords["year"].values) == [2020, 2050] assert set(data.coords["commodity"].values) == { "electricity", @@ -180,15 +212,26 @@ def test_demand_dataset(default_new_input): "CO2f", } - assert data.sel(year=2020, commodity="electricity", region="R1", timeslice=0) == 1 + assert data.sel(year=2020, commodity="heat", region="R1", timeslice="1") == 1 @mark.xfail -def test_new_read_initial_market(default_new_input): - from muse.new_input.readers import read_inputs - - all_data = read_inputs(default_new_input) - data = all_data["initial_market"] +def test_calculate_initial_market( + populate_commodities, + populate_regions, + populate_timeslices, + populate_commodity_trade, + populate_commodity_costs, +): + from muse.new_input.readers import calculate_initial_market + + data = calculate_initial_market( + populate_commodities, + populate_regions, + populate_timeslices, + populate_commodity_trade, + populate_commodity_costs, + ) assert isinstance(data, xr.Dataset) assert set(data.dims) == {"region", "year", "commodity", "timeslice"} @@ -198,15 +241,15 @@ def test_new_read_initial_market(default_new_input): imports=np.float64, static_trade=np.float64, ) - assert list(data.coords["region"].values) == ["R1"] - assert list(data.coords["year"].values) == list(range(2010, 2105, 5)) - assert list(data.coords["commodity"].values) == [ + assert set(data.coords["region"].values) == {"R1"} + assert set(data.coords["year"].values) == set(range(2010, 2105, 5)) + assert set(data.coords["commodity"].values) == { "electricity", "gas", "heat", "CO2f", "wind", - ] + } month_values = ["all-year"] * 6 day_values = ["all-week"] * 6 hour_values = [ From 1a6652c21c4559329a35ec3752225340cc9ed7fb Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 19 Aug 2024 12:25:32 +0100 Subject: [PATCH 16/43] Convert timeslice id to int, fix failing test --- src/muse/new_input/readers.py | 6 +++--- tests/test_new_readers.py | 13 +++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index b216ba0d4..67c5dd9aa 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -41,7 +41,7 @@ def read_inputs(data_dir): def read_timeslices_csv(buffer_, con): sql = """CREATE TABLE timeslices ( - id VARCHAR PRIMARY KEY, + id BIGINT PRIMARY KEY, season VARCHAR, day VARCHAR, time_of_day VARCHAR, @@ -133,7 +133,7 @@ def read_demand_slicing_csv(buffer_, con): commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), year BIGINT, - timeslice VARCHAR REFERENCES timeslices(id), + timeslice BIGINT REFERENCES timeslices(id), fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), PRIMARY KEY (commodity, region, year, timeslice), FOREIGN KEY (commodity, region, year) REFERENCES demand(commodity, region, year) @@ -201,7 +201,7 @@ def calculate_demand( all_commodities = commodities["id"].astype(np.dtype("str")) all_regions = regions["id"].astype(np.dtype("str")) all_years = df_demand.index.get_level_values("year").unique() - all_timeslices = timeslices["id"].astype(np.dtype("str")) + all_timeslices = timeslices["id"].astype(np.dtype("int")) # CHECK: all years are specified for each commodity/region combination check_all_values_specified(df_demand, ["commodity", "region"], "year", all_years) diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 483d42627..68f715d55 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -57,7 +57,12 @@ def populate_demand(default_new_input, con, populate_regions, populate_commoditi @fixture def populate_demand_slicing( - default_new_input, con, populate_regions, populate_commodities, populate_demand + default_new_input, + con, + populate_regions, + populate_commodities, + populate_demand, + populate_timeslices, ): from muse.new_input.readers import read_demand_slicing_csv @@ -84,7 +89,7 @@ def populate_timeslices(default_new_input, con): def test_read_timeslices_csv(populate_timeslices): data = populate_timeslices assert len(data["id"]) == 6 - assert next(iter(data["id"])) == "1" + assert next(iter(data["id"])) == 1 assert next(iter(data["season"])) == "all" assert next(iter(data["day"])) == "all" assert next(iter(data["time_of_day"])) == "night" @@ -202,7 +207,7 @@ def test_calculate_demand( assert set(data.dims) == {"year", "commodity", "region", "timeslice"} assert list(data.coords["region"].values) == ["R1"] - assert list(data.coords["timeslice"].values) == ["1", "2", "3", "4", "5", "6"] + assert set(data.coords["timeslice"].values) == set(range(1, 7)) assert list(data.coords["year"].values) == [2020, 2050] assert set(data.coords["commodity"].values) == { "electricity", @@ -212,7 +217,7 @@ def test_calculate_demand( "CO2f", } - assert data.sel(year=2020, commodity="heat", region="R1", timeslice="1") == 1 + assert data.sel(year=2020, commodity="heat", region="R1", timeslice=1) == 1 @mark.xfail From 3de85ddea6dd672eeef748cbf895a3b529d6d67b Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 19 Aug 2024 14:13:27 +0100 Subject: [PATCH 17/43] Finish initial market reader --- src/muse/new_input/readers.py | 61 +++++++++++++++++++++++++++-------- tests/test_new_readers.py | 45 ++++++++++++-------------- 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 67c5dd9aa..c8833e902 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -3,6 +3,8 @@ import pandas as pd import xarray as xr +from muse.timeslices import QuantityType + def read_inputs(data_dir): data = {} @@ -42,17 +44,15 @@ def read_inputs(data_dir): def read_timeslices_csv(buffer_, con): sql = """CREATE TABLE timeslices ( id BIGINT PRIMARY KEY, - season VARCHAR, + month VARCHAR, day VARCHAR, - time_of_day VARCHAR, + hour VARCHAR, fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql( - "INSERT INTO timeslices SELECT id, season, day, time_of_day, fraction FROM rel;" - ) + con.sql("INSERT INTO timeslices SELECT id, month, day, hour, fraction FROM rel;") return con.sql("SELECT * from timeslices").fetchnumpy() @@ -278,9 +278,11 @@ def calculate_initial_market( - If price data is not specified for a commodity/region combination, the price is zero - """ - from muse.timeslices import QuantityType, convert_timeslice + Todo: + - Allow data to be specified on a timeslice level (optional) + - Interpolation, missing year field, flexible timeslice specification as above + """ # Prepare dataframes df_trade = pd.DataFrame(commodity_trade).set_index(["commodity", "region", "year"]) df_costs = ( @@ -288,7 +290,7 @@ def calculate_initial_market( .set_index(["commodity", "region", "year"]) .rename(columns={"value": "prices"}) ) - df_timeslices = pd.DataFrame(timeslices).set_index(["season", "day", "time_of_day"]) + df_timeslices = pd.DataFrame(timeslices).set_index(["month", "day", "hour"]) # DataArray dimensions all_commodities = commodities["id"].astype(np.dtype("str")) @@ -320,13 +322,17 @@ def calculate_initial_market( # Calculate static trade df_trade["static_trade"] = df_trade["export"] - df_trade["import"] - # Create Data - df_full = df_costs.join(df_trade) - data = df_full.to_xarray() - ts = df_timeslices.to_xarray()["fraction"] - ts = ts.stack(timeslice=("season", "day", "time_of_day")) - convert_timeslice(data, ts, QuantityType.EXTENSIVE) + # Create xarray datasets + xr_costs = df_costs.to_xarray() + xr_trade = df_trade.to_xarray() + + # Project over timeslices + ts = df_timeslices.to_xarray()["fraction"].stack(timeslice=("month", "day", "hour")) + xr_costs = project_timeslice(xr_costs, ts, QuantityType.EXTENSIVE) + xr_trade = project_timeslice(xr_trade, ts, QuantityType.INTENSIVE) + # Combine data + data = xr.merge([xr_costs, xr_trade]) return data @@ -353,3 +359,30 @@ def check_all_values_specified( ).all(): msg = "" # TODO raise DataValidationError(msg) + + +def project_timeslice( + data: xr.Dataset, timeslices: xr.DataArray, quantity_type: QuantityType +) -> xr.Dataset: + """Project a dataset over a new timeslice dimension. + + The projection can be done in one of two ways, depending on whether the + quantity type is extensive or intensive. See `QuantityType`. + + Args: + data: Dataset to project + timeslices: DataArray of timeslice levels, with values between 0 and 1 + representing the timeslice length (fraction of the year) + quantity_type: Type of projection to perform. QuantityType.EXTENSIVE or + QuantityType.INTENSIVE + + Returns: + Projected dataset + """ + assert "timeslice" in timeslices.dims + assert "timeslice" not in data.dims + + if quantity_type is QuantityType.INTENSIVE: + return data * timeslices + if quantity_type is QuantityType.EXTENSIVE: + return data * xr.ones_like(timeslices) diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 68f715d55..07a3889e3 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -3,7 +3,7 @@ import duckdb import numpy as np import xarray as xr -from pytest import approx, fixture, mark, raises +from pytest import approx, fixture, raises @fixture @@ -206,9 +206,9 @@ def test_calculate_demand( assert data.dtype == np.float64 assert set(data.dims) == {"year", "commodity", "region", "timeslice"} - assert list(data.coords["region"].values) == ["R1"] + assert set(data.coords["region"].values) == {"R1"} assert set(data.coords["timeslice"].values) == set(range(1, 7)) - assert list(data.coords["year"].values) == [2020, 2050] + assert set(data.coords["year"].values) == {2020, 2050} assert set(data.coords["commodity"].values) == { "electricity", "gas", @@ -220,7 +220,6 @@ def test_calculate_demand( assert data.sel(year=2020, commodity="heat", region="R1", timeslice=1) == 1 -@mark.xfail def test_calculate_initial_market( populate_commodities, populate_regions, @@ -240,12 +239,8 @@ def test_calculate_initial_market( assert isinstance(data, xr.Dataset) assert set(data.dims) == {"region", "year", "commodity", "timeslice"} - assert dict(data.dtypes) == dict( - prices=np.float64, - exports=np.float64, - imports=np.float64, - static_trade=np.float64, - ) + for dt in data.dtypes.values(): + assert dt == np.dtype("float64") assert set(data.coords["region"].values) == {"R1"} assert set(data.coords["year"].values) == set(range(2010, 2105, 5)) assert set(data.coords["commodity"].values) == { @@ -266,28 +261,30 @@ def test_calculate_initial_market( "evening", ] - assert list(data.coords["timeslice"].values) == list( + assert set(data.coords["timeslice"].values) == set( zip(month_values, day_values, hour_values) ) - assert list(data.coords["month"]) == month_values - assert list(data.coords["day"]) == day_values - assert list(data.coords["hour"]) == hour_values + assert set(data.coords["month"].values) == set(month_values) + assert set(data.coords["day"].values) == set(day_values) + assert set(data.coords["hour"].values) == set(hour_values) assert all(var.coords.equals(data.coords) for var in data.data_vars.values()) prices = data.data_vars["prices"] - assert approx( - prices.sel( - year=2010, - region="R1", - commodity="electricity", - timeslice=("all-year", "all-week", "night"), + assert ( + approx( + prices.sel( + year=2010, + region="R1", + commodity="electricity", + timeslice=("all-year", "all-week", "night"), + ), + abs=1e-4, ) - - 14.81481, - abs=1e-4, + == 14.81481 ) - exports = data.data_vars["exports"] + exports = data.data_vars["export"] assert ( exports.sel( year=2010, @@ -297,7 +294,7 @@ def test_calculate_initial_market( ) ) == 0 - imports = data.data_vars["imports"] + imports = data.data_vars["import"] assert ( imports.sel( year=2010, From cd3d3dc39e5cb75a95c0e4f70f2ba6fd75aad18d Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 19 Aug 2024 14:24:04 +0100 Subject: [PATCH 18/43] Fix test --- tests/test_new_readers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 07a3889e3..e7d7e31d9 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -90,9 +90,9 @@ def test_read_timeslices_csv(populate_timeslices): data = populate_timeslices assert len(data["id"]) == 6 assert next(iter(data["id"])) == 1 - assert next(iter(data["season"])) == "all" - assert next(iter(data["day"])) == "all" - assert next(iter(data["time_of_day"])) == "night" + assert next(iter(data["month"])) == "all-year" + assert next(iter(data["day"])) == "all-week" + assert next(iter(data["hour"])) == "night" assert next(iter(data["fraction"])) == approx(0.1667) From e82cb11d644987db44126b0375cb58b1c8159151 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 8 Aug 2025 12:27:43 +0100 Subject: [PATCH 19/43] Undo rebase mistake --- tests/test_readers.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_readers.py b/tests/test_readers.py index e0b4bcd63..924dcacff 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -290,3 +290,27 @@ def test_get_nan_coordinates(): dataset1 = xr.Dataset.from_dataframe(df1.set_index(["region", "year"])) nan_coords1 = get_nan_coordinates(dataset1) assert nan_coords1 == [("R1", 2021)] + + # Test 2: Missing coordinate combinations + df2 = pd.DataFrame( + { + "region": ["R1", "R1", "R2"], # Missing R2-2021 + "year": [2020, 2021, 2020], + "value": [1.0, 2.0, 3.0], + } + ) + dataset2 = xr.Dataset.from_dataframe(df2.set_index(["region", "year"])) + nan_coords2 = get_nan_coordinates(dataset2) + assert nan_coords2 == [("R2", 2021)] + + # Test 3: No NaN values + df3 = pd.DataFrame( + { + "region": ["R1", "R1", "R2", "R2"], + "year": [2020, 2021, 2020, 2021], + "value": [1.0, 2.0, 3.0, 4.0], + } + ) + dataset3 = xr.Dataset.from_dataframe(df3.set_index(["region", "year"])) + nan_coords3 = get_nan_coordinates(dataset3) + assert nan_coords3 == [] From 085a080532088155efeb2a15bcea2dd2fed82a10 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Tue, 12 Aug 2025 12:24:54 +0100 Subject: [PATCH 20/43] Delete some outdated code, get the tests passing --- src/muse/new_input/readers.py | 305 ++++------------------------------ tests/test_new_readers.py | 211 ++--------------------- 2 files changed, 46 insertions(+), 470 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index c8833e902..e504bdb03 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,59 +1,62 @@ import duckdb import numpy as np -import pandas as pd import xarray as xr -from muse.timeslices import QuantityType - def read_inputs(data_dir): data = {} con = duckdb.connect(":memory:") - with open(data_dir / "timeslices.csv") as f: - timeslices = read_timeslices_csv(f, con) + with open(data_dir / "time_slices.csv") as f: + _time_slices = read_time_slices_csv(f, con) with open(data_dir / "commodities.csv") as f: commodities = read_commodities_csv(f, con) with open(data_dir / "regions.csv") as f: - regions = read_regions_csv(f, con) - - with open(data_dir / "commodity_trade.csv") as f: - commodity_trade = read_commodity_trade_csv(f, con) + _regions = read_regions_csv(f, con) with open(data_dir / "commodity_costs.csv") as f: - commodity_costs = read_commodity_costs_csv(f, con) + _commodity_costs = read_commodity_costs_csv(f, con) with open(data_dir / "demand.csv") as f: - demand = read_demand_csv(f, con) + _demand = read_demand_csv(f, con) with open(data_dir / "demand_slicing.csv") as f: - demand_slicing = read_demand_slicing_csv(f, con) + _demand_slicing = read_demand_slicing_csv(f, con) data["global_commodities"] = calculate_global_commodities(commodities) - data["demand"] = calculate_demand( - commodities, regions, timeslices, demand, demand_slicing - ) - data["initial_market"] = calculate_initial_market( - commodities, regions, timeslices, commodity_trade, commodity_costs - ) return data -def read_timeslices_csv(buffer_, con): - sql = """CREATE TABLE timeslices ( - id BIGINT PRIMARY KEY, - month VARCHAR, +def read_time_slices_csv(buffer_, con): + sql = """ + CREATE TABLE time_slices ( + id VARCHAR PRIMARY KEY, + season VARCHAR, day VARCHAR, - hour VARCHAR, - fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), + time_of_day VARCHAR, + fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1) ); """ con.sql(sql) + + # Read CSV into a temporary relation rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO timeslices SELECT id, month, day, hour, fraction FROM rel;") - return con.sql("SELECT * from timeslices").fetchnumpy() + + # Insert into the table with computed id + con.sql(""" + INSERT INTO time_slices + SELECT + season || '.' || day || '.' || time_of_day AS id, + season, + day, + time_of_day, + fraction + FROM rel + """) + + return con.sql("SELECT * FROM time_slices").fetchnumpy() def read_commodities_csv(buffer_, con): @@ -80,23 +83,6 @@ def read_regions_csv(buffer_, con): return con.sql("SELECT * from regions").fetchnumpy() -def read_commodity_trade_csv(buffer_, con): - sql = """CREATE TABLE commodity_trade ( - commodity VARCHAR REFERENCES commodities(id), - region VARCHAR REFERENCES regions(id), - year BIGINT, - import DOUBLE, - export DOUBLE, - PRIMARY KEY (commodity, region, year) - ); - """ - con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("""INSERT INTO commodity_trade SELECT - commodity_id, region_id, year, import, export FROM rel;""") - return con.sql("SELECT * from commodity_trade").fetchnumpy() - - def read_commodity_costs_csv(buffer_, con): sql = """CREATE TABLE commodity_costs ( commodity VARCHAR REFERENCES commodities(id), @@ -132,17 +118,15 @@ def read_demand_slicing_csv(buffer_, con): sql = """CREATE TABLE demand_slicing ( commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), - year BIGINT, - timeslice BIGINT REFERENCES timeslices(id), + time_slice VARCHAR REFERENCES time_slices(id), fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), - PRIMARY KEY (commodity, region, year, timeslice), - FOREIGN KEY (commodity, region, year) REFERENCES demand(commodity, region, year) + PRIMARY KEY (commodity, region, time_slice), ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO demand_slicing SELECT - commodity_id, region_id, year, timeslice_id, fraction FROM rel;""") + commodity_id, region_id, time_slice, fraction FROM rel;""") return con.sql("SELECT * from demand_slicing").fetchnumpy() @@ -161,228 +145,3 @@ def calculate_global_commodities(commodities): data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) return data - - -def calculate_demand( - commodities, regions, timeslices, demand, demand_slicing -) -> xr.DataArray: - """Calculate demand data for all commodities, regions, years, and timeslices. - - Result: A DataArray with a demand value for every combination of: - - commodity: all commodities specified in the commodities table - - region: all regions specified in the regions table - - year: all years specified in the demand table - - timeslice: all timeslices specified in the timeslices table - - Checks: - - If demand data is specified for one year, it must be specified for all years. - - If demand is nonzero, slicing data must be present. - - If slicing data is specified for a commodity/region/year, the sum of - the fractions must be 1, and all timeslices must be present. - - Fills: - - If demand data is not specified for a commodity/region combination, the demand is - 0 for all years and timeslices. - - Todo: - - Interpolation to allow for missing years in demand data. - - Ability to leave the year field blank in both tables to indicate all years - - Allow slicing data to be missing -> demand is spread equally across timeslices - - Allow more flexibility for timeslices (e.g. can specify "winter" to apply to all - winter timeslices, or "all" to apply to all timeslices) - """ - # Prepare dataframes - df_demand = pd.DataFrame(demand).set_index(["commodity", "region", "year"]) - df_slicing = pd.DataFrame(demand_slicing).set_index( - ["commodity", "region", "year", "timeslice"] - ) - - # DataArray dimensions - all_commodities = commodities["id"].astype(np.dtype("str")) - all_regions = regions["id"].astype(np.dtype("str")) - all_years = df_demand.index.get_level_values("year").unique() - all_timeslices = timeslices["id"].astype(np.dtype("int")) - - # CHECK: all years are specified for each commodity/region combination - check_all_values_specified(df_demand, ["commodity", "region"], "year", all_years) - - # CHECK: if slicing data is present, all timeslices must be specified - check_all_values_specified( - df_slicing, ["commodity", "region", "year"], "timeslice", all_timeslices - ) - - # CHECK: timeslice fractions sum to 1 - check_timeslice_sum = df_slicing.groupby(["commodity", "region", "year"]).apply( - lambda x: np.isclose(x["fraction"].sum(), 1) - ) - if not check_timeslice_sum.all(): - raise DataValidationError - - # CHECK: if demand data >0, fraction data must be specified - check_fraction_data_present = ( - df_demand[df_demand["demand"] > 0] - .index.isin(df_slicing.droplevel("timeslice").index) - .all() - ) - if not check_fraction_data_present.all(): - raise DataValidationError - - # FILL: demand is zero if unspecified - df_demand = df_demand.reindex( - pd.MultiIndex.from_product( - [all_commodities, all_regions, all_years], - names=["commodity", "region", "year"], - ), - fill_value=0, - ) - - # FILL: slice data is zero if unspecified - df_slicing = df_slicing.reindex( - pd.MultiIndex.from_product( - [all_commodities, all_regions, all_years, all_timeslices], - names=["commodity", "region", "year", "timeslice"], - ), - fill_value=0, - ) - - # Create DataArray - da_demand = df_demand.to_xarray()["demand"] - da_slicing = df_slicing.to_xarray()["fraction"] - data = da_demand * da_slicing - return data - - -def calculate_initial_market( - commodities, regions, timeslices, commodity_trade, commodity_costs -) -> xr.Dataset: - """Calculate trade and price data for all commodities, regions and years. - - Result: A Dataset with variables: - - prices - - exports - - imports - - static_trade - For every combination of: - - commodity: all commodities specified in the commodities table - - region: all regions specified in the regions table - - year: all years specified in the commodity_costs table - - timeslice (multiindex): all timeslices specified in the timeslices table - - Checks: - - If trade data is specified for one year, it must be specified for all years. - - If price data is specified for one year, it must be specified for all years. - - Fills: - - If trade data is not specified for a commodity/region combination, imports and - exports are both zero - - If price data is not specified for a commodity/region combination, the price is - zero - - Todo: - - Allow data to be specified on a timeslice level (optional) - - Interpolation, missing year field, flexible timeslice specification as above - - """ - # Prepare dataframes - df_trade = pd.DataFrame(commodity_trade).set_index(["commodity", "region", "year"]) - df_costs = ( - pd.DataFrame(commodity_costs) - .set_index(["commodity", "region", "year"]) - .rename(columns={"value": "prices"}) - ) - df_timeslices = pd.DataFrame(timeslices).set_index(["month", "day", "hour"]) - - # DataArray dimensions - all_commodities = commodities["id"].astype(np.dtype("str")) - all_regions = regions["id"].astype(np.dtype("str")) - all_years = df_costs.index.get_level_values("year").unique() - - # CHECK: all years are specified for each commodity/region combination - check_all_values_specified(df_trade, ["commodity", "region"], "year", all_years) - check_all_values_specified(df_costs, ["commodity", "region"], "year", all_years) - - # FILL: price is zero if unspecified - df_costs = df_costs.reindex( - pd.MultiIndex.from_product( - [all_commodities, all_regions, all_years], - names=["commodity", "region", "year"], - ), - fill_value=0, - ) - - # FILL: trade is zero if unspecified - df_trade = df_trade.reindex( - pd.MultiIndex.from_product( - [all_commodities, all_regions, all_years], - names=["commodity", "region", "year"], - ), - fill_value=0, - ) - - # Calculate static trade - df_trade["static_trade"] = df_trade["export"] - df_trade["import"] - - # Create xarray datasets - xr_costs = df_costs.to_xarray() - xr_trade = df_trade.to_xarray() - - # Project over timeslices - ts = df_timeslices.to_xarray()["fraction"].stack(timeslice=("month", "day", "hour")) - xr_costs = project_timeslice(xr_costs, ts, QuantityType.EXTENSIVE) - xr_trade = project_timeslice(xr_trade, ts, QuantityType.INTENSIVE) - - # Combine data - data = xr.merge([xr_costs, xr_trade]) - return data - - -class DataValidationError(ValueError): - pass - - -def check_all_values_specified( - df: pd.DataFrame, group_by_cols: list[str], column_name: str, values: list -) -> None: - """Check that the required values are specified in a dataframe. - - Checks that a row exists for all specified values of column_name for each - group in the grouped dataframe. - """ - if not ( - df.groupby(group_by_cols) - .apply( - lambda x: ( - set(x.index.get_level_values(column_name).unique()) == set(values) - ) - ) - .all() - ).all(): - msg = "" # TODO - raise DataValidationError(msg) - - -def project_timeslice( - data: xr.Dataset, timeslices: xr.DataArray, quantity_type: QuantityType -) -> xr.Dataset: - """Project a dataset over a new timeslice dimension. - - The projection can be done in one of two ways, depending on whether the - quantity type is extensive or intensive. See `QuantityType`. - - Args: - data: Dataset to project - timeslices: DataArray of timeslice levels, with values between 0 and 1 - representing the timeslice length (fraction of the year) - quantity_type: Type of projection to perform. QuantityType.EXTENSIVE or - QuantityType.INTENSIVE - - Returns: - Projected dataset - """ - assert "timeslice" in timeslices.dims - assert "timeslice" not in data.dims - - if quantity_type is QuantityType.INTENSIVE: - return data * timeslices - if quantity_type is QuantityType.EXTENSIVE: - return data * xr.ones_like(timeslices) diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index e7d7e31d9..f8c81f89e 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -1,9 +1,7 @@ -from io import StringIO - import duckdb import numpy as np import xarray as xr -from pytest import approx, fixture, raises +from pytest import approx, fixture @fixture @@ -27,16 +25,6 @@ def populate_commodities(default_new_input, con): return read_commodities_csv(f, con) -@fixture -def populate_commodity_trade( - default_new_input, con, populate_commodities, populate_regions -): - from muse.new_input.readers import read_commodity_trade_csv - - with open(default_new_input / "commodity_trade.csv") as f: - return read_commodity_trade_csv(f, con) - - @fixture def populate_commodity_costs( default_new_input, con, populate_commodities, populate_regions @@ -62,7 +50,7 @@ def populate_demand_slicing( populate_regions, populate_commodities, populate_demand, - populate_timeslices, + populate_time_slices, ): from muse.new_input.readers import read_demand_slicing_csv @@ -79,21 +67,19 @@ def populate_regions(default_new_input, con): @fixture -def populate_timeslices(default_new_input, con): - from muse.new_input.readers import read_timeslices_csv +def populate_time_slices(default_new_input, con): + from muse.new_input.readers import read_time_slices_csv - with open(default_new_input / "timeslices.csv") as f: - return read_timeslices_csv(f, con) + with open(default_new_input / "time_slices.csv") as f: + return read_time_slices_csv(f, con) -def test_read_timeslices_csv(populate_timeslices): - data = populate_timeslices - assert len(data["id"]) == 6 - assert next(iter(data["id"])) == 1 - assert next(iter(data["month"])) == "all-year" +def test_read_time_slices_csv(populate_time_slices): + data = populate_time_slices + assert next(iter(data["season"])) == "all-year" assert next(iter(data["day"])) == "all-week" - assert next(iter(data["hour"])) == "night" - assert next(iter(data["fraction"])) == approx(0.1667) + assert next(iter(data["time_of_day"])) == "night" + assert next(iter(data["fraction"])) == approx(0.166667) def test_read_regions_csv(populate_regions): @@ -107,22 +93,13 @@ def test_read_commodities_csv(populate_commodities): assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] -def test_read_commodity_trade_csv(populate_commodity_trade): - data = populate_commodity_trade - assert data["commodity"].size == 0 - assert data["region"].size == 0 - assert data["year"].size == 0 - assert data["import"].size == 0 - assert data["export"].size == 0 - - def test_read_commodity_costs_csv(populate_commodity_costs): data = populate_commodity_costs # Only checking the first element of each array, as the table is large assert next(iter(data["commodity"])) == "electricity" assert next(iter(data["region"])) == "R1" - assert next(iter(data["year"])) == 2010 - assert next(iter(data["value"])) == approx(14.81481) + assert next(iter(data["year"])) == 2020 + assert next(iter(data["value"])) == approx(19.5) def test_read_demand_csv(populate_demand): @@ -137,37 +114,7 @@ def test_read_demand_slicing_csv(populate_demand_slicing): data = populate_demand_slicing assert np.all(data["commodity"] == "heat") assert np.all(data["region"] == "R1") - # assert np.all(data["timeslice"] == np.array([0, 1])) - assert np.all( - data["fraction"] - == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2, 0.1, 0.15, 0.1, 0.15, 0.3, 0.2]) - ) - - -def test_read_commodities_csv_type_constraint(con): - from muse.new_input.readers import read_commodities_csv - - csv = StringIO("id,type,unit\nfoo,invalid,bar\n") - with raises(duckdb.ConstraintException): - read_commodities_csv(csv, con) - - -def test_read_demand_csv_commodity_constraint( - con, populate_commodities, populate_regions -): - from muse.new_input.readers import read_demand_csv - - csv = StringIO("year,commodity_id,region_id,demand\n2020,invalid,R1,0\n") - with raises(duckdb.ConstraintException, match=".*foreign key.*"): - read_demand_csv(csv, con) - - -def test_read_demand_csv_region_constraint(con, populate_commodities, populate_regions): - from muse.new_input.readers import read_demand_csv - - csv = StringIO("year,commodity_id,region_id,demand\n2020,heat,invalid,0\n") - with raises(duckdb.ConstraintException, match=".*foreign key.*"): - read_demand_csv(csv, con) + assert np.all(data["fraction"] == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2])) def test_calculate_global_commodities(populate_commodities): @@ -183,133 +130,3 @@ def test_calculate_global_commodities(populate_commodities): assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) - - -def test_calculate_demand( - populate_commodities, - populate_regions, - populate_timeslices, - populate_demand, - populate_demand_slicing, -): - from muse.new_input.readers import calculate_demand - - data = calculate_demand( - populate_commodities, - populate_regions, - populate_timeslices, - populate_demand, - populate_demand_slicing, - ) - - assert isinstance(data, xr.DataArray) - assert data.dtype == np.float64 - - assert set(data.dims) == {"year", "commodity", "region", "timeslice"} - assert set(data.coords["region"].values) == {"R1"} - assert set(data.coords["timeslice"].values) == set(range(1, 7)) - assert set(data.coords["year"].values) == {2020, 2050} - assert set(data.coords["commodity"].values) == { - "electricity", - "gas", - "heat", - "wind", - "CO2f", - } - - assert data.sel(year=2020, commodity="heat", region="R1", timeslice=1) == 1 - - -def test_calculate_initial_market( - populate_commodities, - populate_regions, - populate_timeslices, - populate_commodity_trade, - populate_commodity_costs, -): - from muse.new_input.readers import calculate_initial_market - - data = calculate_initial_market( - populate_commodities, - populate_regions, - populate_timeslices, - populate_commodity_trade, - populate_commodity_costs, - ) - - assert isinstance(data, xr.Dataset) - assert set(data.dims) == {"region", "year", "commodity", "timeslice"} - for dt in data.dtypes.values(): - assert dt == np.dtype("float64") - assert set(data.coords["region"].values) == {"R1"} - assert set(data.coords["year"].values) == set(range(2010, 2105, 5)) - assert set(data.coords["commodity"].values) == { - "electricity", - "gas", - "heat", - "CO2f", - "wind", - } - month_values = ["all-year"] * 6 - day_values = ["all-week"] * 6 - hour_values = [ - "night", - "morning", - "afternoon", - "early-peak", - "late-peak", - "evening", - ] - - assert set(data.coords["timeslice"].values) == set( - zip(month_values, day_values, hour_values) - ) - assert set(data.coords["month"].values) == set(month_values) - assert set(data.coords["day"].values) == set(day_values) - assert set(data.coords["hour"].values) == set(hour_values) - - assert all(var.coords.equals(data.coords) for var in data.data_vars.values()) - - prices = data.data_vars["prices"] - assert ( - approx( - prices.sel( - year=2010, - region="R1", - commodity="electricity", - timeslice=("all-year", "all-week", "night"), - ), - abs=1e-4, - ) - == 14.81481 - ) - - exports = data.data_vars["export"] - assert ( - exports.sel( - year=2010, - region="R1", - commodity="electricity", - timeslice=("all-year", "all-week", "night"), - ) - ) == 0 - - imports = data.data_vars["import"] - assert ( - imports.sel( - year=2010, - region="R1", - commodity="electricity", - timeslice=("all-year", "all-week", "night"), - ) - ) == 0 - - static_trade = data.data_vars["static_trade"] - assert ( - static_trade.sel( - year=2010, - region="R1", - commodity="electricity", - timeslice=("all-year", "all-week", "night"), - ) - ) == 0 From 5c5b7c23c059463fdb5cbc44be2510188e3de53f Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 09:24:24 +0100 Subject: [PATCH 21/43] Small touch ups --- .../process_availabilities.csv | 2 +- src/muse/new_input/readers.py | 31 +++---------------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index 6c6901e07..a1c598cf6 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,4 +1,4 @@ -process_id,region_id,year,timeslice,limit_type,value +process_id,region_id,year,time_slice,limit_type,value gassupply1,R1,2020,annual,up,0.9 gasCCGT,R1,2020,annual,up,0.9 windturbine,R1,2020,annual,up,0.4 diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index e504bdb03..015edf1ff 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,21 +1,18 @@ import duckdb -import numpy as np -import xarray as xr -def read_inputs(data_dir): - data = {} +def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: con = duckdb.connect(":memory:") with open(data_dir / "time_slices.csv") as f: _time_slices = read_time_slices_csv(f, con) - with open(data_dir / "commodities.csv") as f: - commodities = read_commodities_csv(f, con) - with open(data_dir / "regions.csv") as f: _regions = read_regions_csv(f, con) + with open(data_dir / "commodities.csv") as f: + _commodities = read_commodities_csv(f, con) + with open(data_dir / "commodity_costs.csv") as f: _commodity_costs = read_commodity_costs_csv(f, con) @@ -25,8 +22,7 @@ def read_inputs(data_dir): with open(data_dir / "demand_slicing.csv") as f: _demand_slicing = read_demand_slicing_csv(f, con) - data["global_commodities"] = calculate_global_commodities(commodities) - return data + return con def read_time_slices_csv(buffer_, con): @@ -128,20 +124,3 @@ def read_demand_slicing_csv(buffer_, con): con.sql("""INSERT INTO demand_slicing SELECT commodity_id, region_id, time_slice, fraction FROM rel;""") return con.sql("SELECT * from demand_slicing").fetchnumpy() - - -def calculate_global_commodities(commodities): - names = commodities["id"].astype(np.dtype("str")) - types = commodities["type"].astype(np.dtype("str")) - units = commodities["unit"].astype(np.dtype("str")) - - type_array = xr.DataArray( - data=types, dims=["commodity"], coords=dict(commodity=names) - ) - - unit_array = xr.DataArray( - data=units, dims=["commodity"], coords=dict(commodity=names) - ) - - data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array)) - return data From 36e958ad498f0b8b82d1d440b6ca60ef755a2930 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 09:42:40 +0100 Subject: [PATCH 22/43] Add new readers and tests --- .../example/default_new_input/processes.csv | 2 +- src/muse/new_input/readers.py | 185 ++++++++++++++++++ tests/test_new_readers.py | 125 ++++++++++-- 3 files changed, 299 insertions(+), 13 deletions(-) diff --git a/src/muse/data/example/default_new_input/processes.csv b/src/muse/data/example/default_new_input/processes.csv index e68ad288c..ae686798f 100644 --- a/src/muse/data/example/default_new_input/processes.csv +++ b/src/muse/data/example/default_new_input/processes.csv @@ -1,5 +1,5 @@ id,description,sector_id -gassupply1,Gas supply,energy +gassupply1,Gas supply,gas gasCCGT,Gas CCGT,power windturbine,Wind turbine,power gasboiler,Gas boiler,residential diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 015edf1ff..5b85421fa 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -10,9 +10,30 @@ def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: with open(data_dir / "regions.csv") as f: _regions = read_regions_csv(f, con) + with open(data_dir / "sectors.csv") as f: + _sectors = read_sectors_csv(f, con) + with open(data_dir / "commodities.csv") as f: _commodities = read_commodities_csv(f, con) + with open(data_dir / "processes.csv") as f: + _processes = read_processes_csv(f, con) + + with open(data_dir / "process_parameters.csv") as f: + _process_parameters = read_process_parameters_csv(f, con) + + with open(data_dir / "process_flows.csv") as f: + _process_flows = read_process_flows_csv(f, con) + + with open(data_dir / "agents.csv") as f: + _agents = read_agents_csv(f, con) + + with open(data_dir / "agent_objectives.csv") as f: + _agent_objectives = read_agent_objectives_csv(f, con) + + with open(data_dir / "assets.csv") as f: + _assets = read_assets_csv(f, con) + with open(data_dir / "commodity_costs.csv") as f: _commodity_costs = read_commodity_costs_csv(f, con) @@ -124,3 +145,167 @@ def read_demand_slicing_csv(buffer_, con): con.sql("""INSERT INTO demand_slicing SELECT commodity_id, region_id, time_slice, fraction FROM rel;""") return con.sql("SELECT * from demand_slicing").fetchnumpy() + + +def read_sectors_csv(buffer_, con): + sql = """CREATE TABLE sectors ( + id VARCHAR PRIMARY KEY, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO sectors SELECT id FROM rel;") + return con.sql("SELECT * from sectors").fetchnumpy() + + +def read_processes_csv(buffer_, con): + sql = """CREATE TABLE processes ( + id VARCHAR PRIMARY KEY, + sector VARCHAR REFERENCES sectors(id) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO processes SELECT id, sector_id FROM rel;") + return con.sql("SELECT * from processes").fetchnumpy() + + +def read_process_parameters_csv(buffer_, con): + sql = """CREATE TABLE process_parameters ( + process VARCHAR REFERENCES processes(id), + region VARCHAR REFERENCES regions(id), + year BIGINT, + cap_par DOUBLE, + fix_par DOUBLE, + var_par DOUBLE, + max_capacity_addition DOUBLE, + max_capacity_growth DOUBLE, + total_capacity_limit DOUBLE, + lifetime DOUBLE, + discount_rate DOUBLE, + PRIMARY KEY (process, region, year) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + """ + INSERT INTO process_parameters SELECT + process_id, + region_id, + year, + cap_par, + fix_par, + var_par, + max_capacity_addition, + max_capacity_growth, + total_capacity_limit, + lifetime, + discount_rate + FROM rel; + """ + ) + return con.sql("SELECT * from process_parameters").fetchnumpy() + + +def read_process_flows_csv(buffer_, con): + sql = """CREATE TABLE process_flows ( + process VARCHAR REFERENCES processes(id), + commodity VARCHAR REFERENCES commodities(id), + region VARCHAR REFERENCES regions(id), + year BIGINT, + coeff DOUBLE, + PRIMARY KEY (process, commodity, region, year) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + """ + INSERT INTO process_flows SELECT + process_id, + commodity_id, + region_id, + year, + coeff + FROM rel; + """ + ) + return con.sql("SELECT * from process_flows").fetchnumpy() + + +def read_agents_csv(buffer_, con): + sql = """CREATE TABLE agents ( + id VARCHAR PRIMARY KEY, + region VARCHAR REFERENCES regions(id), + sector VARCHAR REFERENCES sectors(id), + search_rule VARCHAR, + decision_rule VARCHAR, + quantity DOUBLE + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + """ + INSERT INTO agents SELECT + id, + region_id, + sector_id, + search_rule, + decision_rule, + quantity + FROM rel; + """ + ) + return con.sql("SELECT * from agents").fetchnumpy() + + +def read_agent_objectives_csv(buffer_, con): + sql = """CREATE TABLE agent_objectives ( + agent VARCHAR REFERENCES agents(id), + objective_type VARCHAR, + decision_weight DOUBLE, + objective_sort BOOLEAN, + PRIMARY KEY (agent, objective_type) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + """ + INSERT INTO agent_objectives SELECT + agent_id, + objective_type, + decision_weight, + objective_sort + FROM rel; + """ + ) + return con.sql("SELECT * from agent_objectives").fetchnumpy() + + +def read_assets_csv(buffer_, con): + sql = """CREATE TABLE assets ( + agent VARCHAR REFERENCES agents(id), + process VARCHAR REFERENCES processes(id), + region VARCHAR REFERENCES regions(id), + commission_year BIGINT, + capacity DOUBLE, + PRIMARY KEY (agent, process, region, commission_year) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql( + """ + INSERT INTO assets SELECT + agent_id, + process_id, + region_id, + commission_year, + capacity + FROM rel; + """ + ) + return con.sql("SELECT * from assets").fetchnumpy() diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index f8c81f89e..ccabb1666 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -1,6 +1,5 @@ import duckdb import numpy as np -import xarray as xr from pytest import approx, fixture @@ -74,6 +73,68 @@ def populate_time_slices(default_new_input, con): return read_time_slices_csv(f, con) +@fixture +def populate_sectors(default_new_input, con): + from muse.new_input.readers import read_sectors_csv + + with open(default_new_input / "sectors.csv") as f: + return read_sectors_csv(f, con) + + +@fixture +def populate_processes(default_new_input, con, populate_sectors): + from muse.new_input.readers import read_processes_csv + + with open(default_new_input / "processes.csv") as f: + return read_processes_csv(f, con) + + +@fixture +def populate_process_parameters( + default_new_input, con, populate_regions, populate_processes +): + from muse.new_input.readers import read_process_parameters_csv + + with open(default_new_input / "process_parameters.csv") as f: + return read_process_parameters_csv(f, con) + + +@fixture +def populate_process_flows( + default_new_input, con, populate_processes, populate_commodities, populate_regions +): + from muse.new_input.readers import read_process_flows_csv + + with open(default_new_input / "process_flows.csv") as f: + return read_process_flows_csv(f, con) + + +@fixture +def populate_agents(default_new_input, con, populate_regions, populate_sectors): + from muse.new_input.readers import read_agents_csv + + with open(default_new_input / "agents.csv") as f: + return read_agents_csv(f, con) + + +@fixture +def populate_agent_objectives(default_new_input, con, populate_agents): + from muse.new_input.readers import read_agent_objectives_csv + + with open(default_new_input / "agent_objectives.csv") as f: + return read_agent_objectives_csv(f, con) + + +@fixture +def populate_assets( + default_new_input, con, populate_agents, populate_processes, populate_regions +): + from muse.new_input.readers import read_assets_csv + + with open(default_new_input / "assets.csv") as f: + return read_assets_csv(f, con) + + def test_read_time_slices_csv(populate_time_slices): data = populate_time_slices assert next(iter(data["season"])) == "all-year" @@ -95,7 +156,6 @@ def test_read_commodities_csv(populate_commodities): def test_read_commodity_costs_csv(populate_commodity_costs): data = populate_commodity_costs - # Only checking the first element of each array, as the table is large assert next(iter(data["commodity"])) == "electricity" assert next(iter(data["region"])) == "R1" assert next(iter(data["year"])) == 2020 @@ -117,16 +177,57 @@ def test_read_demand_slicing_csv(populate_demand_slicing): assert np.all(data["fraction"] == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2])) -def test_calculate_global_commodities(populate_commodities): - from muse.new_input.readers import calculate_global_commodities +def test_read_sectors_csv(populate_sectors): + data = populate_sectors + assert next(iter(data["id"])) == "gas" + - data = calculate_global_commodities(populate_commodities) +def test_read_processes_csv(populate_processes): + data = populate_processes + assert next(iter(data["id"])) == "gassupply1" + assert next(iter(data["sector"])) == "gas" - assert isinstance(data, xr.Dataset) - assert set(data.dims) == {"commodity"} - for dt in data.dtypes.values(): - assert np.issubdtype(dt, np.dtype("str")) - assert list(data.coords["commodity"].values) == list(populate_commodities["id"]) - assert list(data.data_vars["type"].values) == list(populate_commodities["type"]) - assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"]) +def test_read_process_parameters_csv(populate_process_parameters): + data = populate_process_parameters + assert next(iter(data["process"])) == "gassupply1" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["year"])) == 2020 + assert next(iter(data["cap_par"])) == approx(0) + assert next(iter(data["discount_rate"])) == approx(0.1) + + +def test_read_process_flows_csv(populate_process_flows): + data = populate_process_flows + assert next(iter(data["process"])) == "gassupply1" + assert next(iter(data["commodity"])) == "gas" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["year"])) == 2020 + assert next(iter(data["coeff"])) == approx(1) + + +def test_read_agents_csv(populate_agents): + data = populate_agents + assert next(iter(data["id"])) == "A1_RES" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["sector"])) == "residential" + assert next(iter(data["search_rule"])) == "all" + assert next(iter(data["decision_rule"])) == "single" + assert next(iter(data["quantity"])) == approx(1) + + +def test_read_agent_objectives_csv(populate_agent_objectives): + data = populate_agent_objectives + assert next(iter(data["agent"])) == "A1_RES" + assert next(iter(data["objective_type"])) == "LCOE" + assert next(iter(data["decision_weight"])) == approx(1) + assert next(iter(data["objective_sort"])) is np.True_ + + +def test_read_assets_csv(populate_assets): + data = populate_assets + assert next(iter(data["agent"])) == "A1_GAS" + assert next(iter(data["process"])) == "gassupply1" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["commission_year"])) == 1995 + assert next(iter(data["capacity"])) == approx(7.5) From d5474acfe1a8f82803f1757bb57d21e8ce5ef2c4 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 11:14:55 +0100 Subject: [PATCH 23/43] Simplify code --- src/muse/new_input/readers.py | 78 +++++---------- tests/test_new_readers.py | 180 +++++++--------------------------- 2 files changed, 55 insertions(+), 203 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 5b85421fa..692783bac 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -3,45 +3,25 @@ def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: con = duckdb.connect(":memory:") - - with open(data_dir / "time_slices.csv") as f: - _time_slices = read_time_slices_csv(f, con) - - with open(data_dir / "regions.csv") as f: - _regions = read_regions_csv(f, con) - - with open(data_dir / "sectors.csv") as f: - _sectors = read_sectors_csv(f, con) - - with open(data_dir / "commodities.csv") as f: - _commodities = read_commodities_csv(f, con) - - with open(data_dir / "processes.csv") as f: - _processes = read_processes_csv(f, con) - - with open(data_dir / "process_parameters.csv") as f: - _process_parameters = read_process_parameters_csv(f, con) - - with open(data_dir / "process_flows.csv") as f: - _process_flows = read_process_flows_csv(f, con) - - with open(data_dir / "agents.csv") as f: - _agents = read_agents_csv(f, con) - - with open(data_dir / "agent_objectives.csv") as f: - _agent_objectives = read_agent_objectives_csv(f, con) - - with open(data_dir / "assets.csv") as f: - _assets = read_assets_csv(f, con) - - with open(data_dir / "commodity_costs.csv") as f: - _commodity_costs = read_commodity_costs_csv(f, con) - - with open(data_dir / "demand.csv") as f: - _demand = read_demand_csv(f, con) - - with open(data_dir / "demand_slicing.csv") as f: - _demand_slicing = read_demand_slicing_csv(f, con) + load_order = [ + ("time_slices.csv", read_time_slices_csv), + ("regions.csv", read_regions_csv), + ("sectors.csv", read_sectors_csv), + ("commodities.csv", read_commodities_csv), + ("processes.csv", read_processes_csv), + ("process_parameters.csv", read_process_parameters_csv), + ("process_flows.csv", read_process_flows_csv), + ("agents.csv", read_agents_csv), + ("agent_objectives.csv", read_agent_objectives_csv), + ("assets.csv", read_assets_csv), + ("commodity_costs.csv", read_commodity_costs_csv), + ("demand.csv", read_demand_csv), + ("demand_slicing.csv", read_demand_slicing_csv), + ] + + for filename, reader in load_order: + with open(data_dir / filename) as f: + reader(f, con) return con @@ -73,31 +53,27 @@ def read_time_slices_csv(buffer_, con): FROM rel """) - return con.sql("SELECT * FROM time_slices").fetchnumpy() - def read_commodities_csv(buffer_, con): sql = """CREATE TABLE commodities ( id VARCHAR PRIMARY KEY, type VARCHAR CHECK (type IN ('energy', 'service', 'material', 'environmental')), - unit VARCHAR, + unit VARCHAR ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO commodities SELECT id, type, unit FROM rel;") - return con.sql("select * from commodities").fetchnumpy() def read_regions_csv(buffer_, con): sql = """CREATE TABLE regions ( - id VARCHAR PRIMARY KEY, + id VARCHAR PRIMARY KEY ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO regions SELECT id FROM rel;") - return con.sql("SELECT * from regions").fetchnumpy() def read_commodity_costs_csv(buffer_, con): @@ -113,7 +89,6 @@ def read_commodity_costs_csv(buffer_, con): rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO commodity_costs SELECT commodity_id, region_id, year, value FROM rel;""") - return con.sql("SELECT * from commodity_costs").fetchnumpy() def read_demand_csv(buffer_, con): @@ -128,7 +103,6 @@ def read_demand_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") - return con.sql("SELECT * from demand").fetchnumpy() def read_demand_slicing_csv(buffer_, con): @@ -137,14 +111,13 @@ def read_demand_slicing_csv(buffer_, con): region VARCHAR REFERENCES regions(id), time_slice VARCHAR REFERENCES time_slices(id), fraction DOUBLE CHECK (fraction >= 0 AND fraction <= 1), - PRIMARY KEY (commodity, region, time_slice), + PRIMARY KEY (commodity, region, time_slice) ); """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("""INSERT INTO demand_slicing SELECT commodity_id, region_id, time_slice, fraction FROM rel;""") - return con.sql("SELECT * from demand_slicing").fetchnumpy() def read_sectors_csv(buffer_, con): @@ -155,7 +128,6 @@ def read_sectors_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO sectors SELECT id FROM rel;") - return con.sql("SELECT * from sectors").fetchnumpy() def read_processes_csv(buffer_, con): @@ -167,7 +139,6 @@ def read_processes_csv(buffer_, con): con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO processes SELECT id, sector_id FROM rel;") - return con.sql("SELECT * from processes").fetchnumpy() def read_process_parameters_csv(buffer_, con): @@ -205,7 +176,6 @@ def read_process_parameters_csv(buffer_, con): FROM rel; """ ) - return con.sql("SELECT * from process_parameters").fetchnumpy() def read_process_flows_csv(buffer_, con): @@ -231,7 +201,6 @@ def read_process_flows_csv(buffer_, con): FROM rel; """ ) - return con.sql("SELECT * from process_flows").fetchnumpy() def read_agents_csv(buffer_, con): @@ -258,7 +227,6 @@ def read_agents_csv(buffer_, con): FROM rel; """ ) - return con.sql("SELECT * from agents").fetchnumpy() def read_agent_objectives_csv(buffer_, con): @@ -282,7 +250,6 @@ def read_agent_objectives_csv(buffer_, con): FROM rel; """ ) - return con.sql("SELECT * from agent_objectives").fetchnumpy() def read_assets_csv(buffer_, con): @@ -308,4 +275,3 @@ def read_assets_csv(buffer_, con): FROM rel; """ ) - return con.sql("SELECT * from assets").fetchnumpy() diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index ccabb1666..dfbfc9d4c 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -1,10 +1,12 @@ +from pathlib import Path + import duckdb import numpy as np from pytest import approx, fixture @fixture -def default_new_input(tmp_path): +def default_new_input(tmp_path) -> Path: from muse.examples import copy_model copy_model("default_new_input", tmp_path) @@ -12,184 +14,68 @@ def default_new_input(tmp_path): @fixture -def con(): - return duckdb.connect(":memory:") - - -@fixture -def populate_commodities(default_new_input, con): - from muse.new_input.readers import read_commodities_csv - - with open(default_new_input / "commodities.csv") as f: - return read_commodities_csv(f, con) - - -@fixture -def populate_commodity_costs( - default_new_input, con, populate_commodities, populate_regions -): - from muse.new_input.readers import read_commodity_costs_csv - - with open(default_new_input / "commodity_costs.csv") as f: - return read_commodity_costs_csv(f, con) - - -@fixture -def populate_demand(default_new_input, con, populate_regions, populate_commodities): - from muse.new_input.readers import read_demand_csv - - with open(default_new_input / "demand.csv") as f: - return read_demand_csv(f, con) - - -@fixture -def populate_demand_slicing( - default_new_input, - con, - populate_regions, - populate_commodities, - populate_demand, - populate_time_slices, -): - from muse.new_input.readers import read_demand_slicing_csv - - with open(default_new_input / "demand_slicing.csv") as f: - return read_demand_slicing_csv(f, con) - - -@fixture -def populate_regions(default_new_input, con): - from muse.new_input.readers import read_regions_csv - - with open(default_new_input / "regions.csv") as f: - return read_regions_csv(f, con) - - -@fixture -def populate_time_slices(default_new_input, con): - from muse.new_input.readers import read_time_slices_csv - - with open(default_new_input / "time_slices.csv") as f: - return read_time_slices_csv(f, con) - - -@fixture -def populate_sectors(default_new_input, con): - from muse.new_input.readers import read_sectors_csv - - with open(default_new_input / "sectors.csv") as f: - return read_sectors_csv(f, con) - - -@fixture -def populate_processes(default_new_input, con, populate_sectors): - from muse.new_input.readers import read_processes_csv - - with open(default_new_input / "processes.csv") as f: - return read_processes_csv(f, con) - - -@fixture -def populate_process_parameters( - default_new_input, con, populate_regions, populate_processes -): - from muse.new_input.readers import read_process_parameters_csv - - with open(default_new_input / "process_parameters.csv") as f: - return read_process_parameters_csv(f, con) - - -@fixture -def populate_process_flows( - default_new_input, con, populate_processes, populate_commodities, populate_regions -): - from muse.new_input.readers import read_process_flows_csv - - with open(default_new_input / "process_flows.csv") as f: - return read_process_flows_csv(f, con) - - -@fixture -def populate_agents(default_new_input, con, populate_regions, populate_sectors): - from muse.new_input.readers import read_agents_csv - - with open(default_new_input / "agents.csv") as f: - return read_agents_csv(f, con) - - -@fixture -def populate_agent_objectives(default_new_input, con, populate_agents): - from muse.new_input.readers import read_agent_objectives_csv - - with open(default_new_input / "agent_objectives.csv") as f: - return read_agent_objectives_csv(f, con) - - -@fixture -def populate_assets( - default_new_input, con, populate_agents, populate_processes, populate_regions -): - from muse.new_input.readers import read_assets_csv +def con(default_new_input) -> duckdb.DuckDBPyConnection: + from muse.new_input.readers import read_inputs - with open(default_new_input / "assets.csv") as f: - return read_assets_csv(f, con) + return read_inputs(default_new_input) -def test_read_time_slices_csv(populate_time_slices): - data = populate_time_slices +def test_read_time_slices_csv(con): + data = con.sql("SELECT * FROM time_slices").fetchnumpy() assert next(iter(data["season"])) == "all-year" assert next(iter(data["day"])) == "all-week" assert next(iter(data["time_of_day"])) == "night" assert next(iter(data["fraction"])) == approx(0.166667) -def test_read_regions_csv(populate_regions): - assert populate_regions["id"] == np.array(["R1"]) +def test_read_regions_csv(con): + data = con.sql("SELECT * FROM regions").fetchnumpy() + assert data["id"] == np.array(["R1"]) -def test_read_commodities_csv(populate_commodities): - data = populate_commodities +def test_read_commodities_csv(con): + data = con.sql("SELECT * FROM commodities").fetchnumpy() assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] assert list(data["type"]) == ["energy"] * 5 assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] -def test_read_commodity_costs_csv(populate_commodity_costs): - data = populate_commodity_costs +def test_read_commodity_costs_csv(con): + data = con.sql("SELECT * FROM commodity_costs").fetchnumpy() assert next(iter(data["commodity"])) == "electricity" assert next(iter(data["region"])) == "R1" assert next(iter(data["year"])) == 2020 assert next(iter(data["value"])) == approx(19.5) -def test_read_demand_csv(populate_demand): - data = populate_demand +def test_read_demand_csv(con): + data = con.sql("SELECT * FROM demand").fetchnumpy() assert np.all(data["year"] == np.array([2020, 2050])) assert np.all(data["commodity"] == np.array(["heat", "heat"])) assert np.all(data["region"] == np.array(["R1", "R1"])) assert np.all(data["demand"] == np.array([10, 30])) -def test_read_demand_slicing_csv(populate_demand_slicing): - data = populate_demand_slicing +def test_read_demand_slicing_csv(con): + data = con.sql("SELECT * FROM demand_slicing").fetchnumpy() assert np.all(data["commodity"] == "heat") assert np.all(data["region"] == "R1") assert np.all(data["fraction"] == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2])) -def test_read_sectors_csv(populate_sectors): - data = populate_sectors +def test_read_sectors_csv(con): + data = con.sql("SELECT * FROM sectors").fetchnumpy() assert next(iter(data["id"])) == "gas" -def test_read_processes_csv(populate_processes): - data = populate_processes +def test_read_processes_csv(con): + data = con.sql("SELECT * FROM processes").fetchnumpy() assert next(iter(data["id"])) == "gassupply1" assert next(iter(data["sector"])) == "gas" -def test_read_process_parameters_csv(populate_process_parameters): - data = populate_process_parameters +def test_read_process_parameters_csv(con): + data = con.sql("SELECT * FROM process_parameters").fetchnumpy() assert next(iter(data["process"])) == "gassupply1" assert next(iter(data["region"])) == "R1" assert next(iter(data["year"])) == 2020 @@ -197,8 +83,8 @@ def test_read_process_parameters_csv(populate_process_parameters): assert next(iter(data["discount_rate"])) == approx(0.1) -def test_read_process_flows_csv(populate_process_flows): - data = populate_process_flows +def test_read_process_flows_csv(con): + data = con.sql("SELECT * FROM process_flows").fetchnumpy() assert next(iter(data["process"])) == "gassupply1" assert next(iter(data["commodity"])) == "gas" assert next(iter(data["region"])) == "R1" @@ -206,8 +92,8 @@ def test_read_process_flows_csv(populate_process_flows): assert next(iter(data["coeff"])) == approx(1) -def test_read_agents_csv(populate_agents): - data = populate_agents +def test_read_agents_csv(con): + data = con.sql("SELECT * FROM agents").fetchnumpy() assert next(iter(data["id"])) == "A1_RES" assert next(iter(data["region"])) == "R1" assert next(iter(data["sector"])) == "residential" @@ -216,16 +102,16 @@ def test_read_agents_csv(populate_agents): assert next(iter(data["quantity"])) == approx(1) -def test_read_agent_objectives_csv(populate_agent_objectives): - data = populate_agent_objectives +def test_read_agent_objectives_csv(con): + data = con.sql("SELECT * FROM agent_objectives").fetchnumpy() assert next(iter(data["agent"])) == "A1_RES" assert next(iter(data["objective_type"])) == "LCOE" assert next(iter(data["decision_weight"])) == approx(1) assert next(iter(data["objective_sort"])) is np.True_ -def test_read_assets_csv(populate_assets): - data = populate_assets +def test_read_assets_csv(con): + data = con.sql("SELECT * FROM assets").fetchnumpy() assert next(iter(data["agent"])) == "A1_GAS" assert next(iter(data["process"])) == "gassupply1" assert next(iter(data["region"])) == "R1" From 4d08bc6850af5a9297e4ebe7635194480e0667f0 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 13:18:14 +0100 Subject: [PATCH 24/43] Add functions for global_commodities and technodictionary xarrays --- src/muse/new_input/readers.py | 62 +++++++++++++++++++++++++++++++++++ tests/test_new_readers.py | 12 +++++++ 2 files changed, 74 insertions(+) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 692783bac..f0b96cc9b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,4 +1,10 @@ import duckdb +import xarray as xr + +from muse.readers.csv import ( + create_multiindex, + create_xarray_dataset, +) def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: @@ -275,3 +281,59 @@ def read_assets_csv(buffer_, con): FROM rel; """ ) + + +def process_global_commodities(con: duckdb.DuckDBPyConnection) -> xr.Dataset: + """Create an xarray Dataset of global commodities from the `commodities` table.""" + df = con.sql( + """ + SELECT + id AS commodity, + type AS commodity_type, + unit + FROM commodities + """ + ).df() + + df.index = df["commodity"] + df = df.drop(columns=["commodity"]) + df.index.name = "commodity" + return create_xarray_dataset(df) + + +def process_technodictionary(con: duckdb.DuckDBPyConnection, sector: str) -> xr.Dataset: + """Create an xarray Dataset analogous to technodictionary from DB tables. + + Uses `processes` and `process_parameters` to build variables over + dimensions (technology, region, year). + """ + df = con.execute( + """ + SELECT + p.id AS technology, + pp.region, + pp.year, + pp.cap_par, + pp.fix_par, + pp.var_par, + pp.max_capacity_addition, + pp.max_capacity_growth, + pp.total_capacity_limit, + pp.lifetime AS technical_life, + pp.discount_rate AS interest_rate + FROM process_parameters pp + JOIN processes p ON p.id = pp.process + WHERE p.sector = ? + """, + [sector], + ).fetchdf() + + df = create_multiindex( + df, + index_columns=["technology", "region", "year"], + index_names=["technology", "region", "year"], + drop_columns=True, + ) + + result = create_xarray_dataset(df) + return result diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index dfbfc9d4c..6a8cce3f7 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -117,3 +117,15 @@ def test_read_assets_csv(con): assert next(iter(data["region"])) == "R1" assert next(iter(data["commission_year"])) == 1995 assert next(iter(data["capacity"])) == approx(7.5) + + +def test_process_global_commodities(con): + from muse.new_input.readers import process_global_commodities + + process_global_commodities(con) + + +def test_process_technodictionary(con): + from muse.new_input.readers import process_technodictionary + + process_technodictionary(con, sector="power") From fd2f711413327fcc5c996451f056fd3c61bb7384 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 13:26:15 +0100 Subject: [PATCH 25/43] process_agent_parameters --- src/muse/new_input/readers.py | 70 +++++++++++++++++++++++++++++++++++ tests/test_new_readers.py | 6 +++ 2 files changed, 76 insertions(+) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index f0b96cc9b..465d35268 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -337,3 +337,73 @@ def process_technodictionary(con: duckdb.DuckDBPyConnection, sector: str) -> xr. result = create_xarray_dataset(df) return result + + +def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> list[dict]: + """Create a list of agent dictionaries for a sector from DB tables. + + The result matches the structure returned by the legacy CSV-based + process_agent_parameters, but only includes the required fields: + - name, region, objectives, search_rules, decision, quantity + + The following legacy fields are intentionally omitted: agent_type, + share, maturity_threshold, spend_limit. + """ + # Gather agent base data for the sector + agents_df = con.execute( + """ + SELECT id AS name, + region AS region, + search_rule, + decision_rule, + quantity + FROM agents + WHERE sector = ? + """, + [sector], + ).fetchdf() + + # Gather objectives per agent + objectives_df = con.execute( + """ + SELECT agent AS name, + objective_type, + objective_sort, + decision_weight + FROM agent_objectives + WHERE agent IN (SELECT id FROM agents WHERE sector = ?) + ORDER BY name + """, + [sector], + ).fetchdf() + + # Assemble result + result: list[dict] = [] + for _, row in agents_df.iterrows(): + agent_name = row["name"] + agent_objectives = objectives_df[objectives_df["name"] == agent_name] + + # Objectives list: in legacy, these are strings like 'LCOE' + objectives = agent_objectives["objective_type"].tolist() + + # Decision parameters: tuples of + # (objective_type, objective_sort, decision_weight) + decision_params = list( + zip( + agent_objectives["objective_type"].tolist(), + agent_objectives["objective_sort"].tolist(), + agent_objectives["decision_weight"].tolist(), + ) + ) + + agent_dict = { + "name": agent_name, + "region": row["region"], + "objectives": objectives, + "search_rules": row["search_rule"], + "decision": {"name": row["decision_rule"], "parameters": decision_params}, + "quantity": row["quantity"], + } + result.append(agent_dict) + + return result diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 6a8cce3f7..f6d1d3d7d 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -129,3 +129,9 @@ def test_process_technodictionary(con): from muse.new_input.readers import process_technodictionary process_technodictionary(con, sector="power") + + +def test_process_agent_parameters(con): + from muse.new_input.readers import process_agent_parameters + + process_agent_parameters(con, sector="power") From ab69b28d3fe503b9aa6cf8c90a59353f0f30ea04 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 13:45:14 +0100 Subject: [PATCH 26/43] process_initial_market --- src/muse/new_input/readers.py | 58 +++++++++++++++++++++++++++++++++++ tests/test_new_readers.py | 8 +++++ 2 files changed, 66 insertions(+) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 465d35268..614640661 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -339,6 +339,64 @@ def process_technodictionary(con: duckdb.DuckDBPyConnection, sector: str) -> xr. return result +def process_initial_market( + con: duckdb.DuckDBPyConnection, currency: str, years: list[int] +) -> xr.Dataset: + """Create initial market dataset with prices and zero trade variables. + + Args: + con: DuckDB connection with tables loaded. + currency: Currency string, e.g. "USD". Mandatory. + years: List of years to cover. Missing combinations are filled with zero. + + Returns: + xr.Dataset with dims (region, year, commodity) and variables + prices, exports, imports, static_trade. Adds coordinate + units_prices = f"{currency}/{unit}" per commodity. + """ + if not isinstance(currency, str) or not currency.strip(): + raise ValueError("currency must be a non-empty string") + + years_sql = ", ".join(f"({y})" for y in years) + df = con.execute( + f""" + WITH years(year) AS (VALUES {years_sql}) + SELECT + r.id AS region, + y.year AS year, + c.id AS commodity, + COALESCE(cc.value, 0) AS prices, + (? || '/' || c.unit) AS units_prices + FROM regions r + CROSS JOIN years y + CROSS JOIN commodities c + LEFT JOIN commodity_costs cc + ON cc.region = r.id AND cc.year = y.year AND cc.commodity = c.id + """, + [currency], + ).fetchdf() + + if df.empty: + raise ValueError("No commodity cost data found to build initial market.") + + # Build dataset from prices + prices_df = create_multiindex( + df, + index_columns=["region", "year", "commodity"], + index_names=["region", "year", "commodity"], + drop_columns=True, + ) + result = create_xarray_dataset(prices_df) + + # Add zero trade variables (legacy) + result["exports"] = xr.zeros_like(result["prices"]).rename("exports") + result["imports"] = xr.zeros_like(result["prices"]).rename("imports") + result["static_trade"] = (result["imports"] - result["exports"]).rename( + "static_trade" + ) + return result + + def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> list[dict]: """Create a list of agent dictionaries for a sector from DB tables. diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index f6d1d3d7d..ca835f922 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -135,3 +135,11 @@ def test_process_agent_parameters(con): from muse.new_input.readers import process_agent_parameters process_agent_parameters(con, sector="power") + + +def test_process_initial_market(con): + from muse.new_input.readers import process_initial_market + + process_initial_market( + con, currency="EUR", years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] + ) From 1ca038ec3a8d53bf6575bbd29bc142359064f946 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 14:22:12 +0100 Subject: [PATCH 27/43] process_initial_capacity --- src/muse/new_input/readers.py | 82 +++++++++++++++++++++++++++++++++-- tests/test_new_readers.py | 8 ++++ 2 files changed, 86 insertions(+), 4 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 614640661..56c964d4a 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,10 +1,8 @@ import duckdb +import pandas as pd import xarray as xr -from muse.readers.csv import ( - create_multiindex, - create_xarray_dataset, -) +from muse.readers.csv import create_assets, create_multiindex, create_xarray_dataset def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: @@ -465,3 +463,79 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis result.append(agent_dict) return result + + +def process_initial_capacity( + con: duckdb.DuckDBPyConnection, sector: str, years: list[int] +) -> xr.DataArray: + """Create existing capacity over time from assets and lifetimes. + + Args: + con: DuckDB connection + sector: Sector name to filter processes + years: List of years to include (no interpolation) + + Returns: + xr.DataArray with dims (asset) and coordinates (asset, technology, region, year) + showing capacity available in each year based on commission year and lifetime. + """ + years_sql = ", ".join(f"({y})" for y in years) + + # Compute capacity trajectory per technology/region/year + # Note: this sums up the capacity of all assets in the same technology/region + # I think ideally we wouldn't do that and would keep these as separate assets + # Also, this isn't taking into account agent ownership + assets_df = con.execute( + f""" + WITH years(year) AS (VALUES {years_sql}), + lifetimes AS ( + SELECT DISTINCT pp.process, pp.region, pp.lifetime + FROM process_parameters pp + JOIN processes p ON p.id = pp.process + WHERE p.sector = ? + ), + assets_enriched AS ( + SELECT + a.process AS technology, + a.region, + a.commission_year, + a.capacity, + lt.lifetime + FROM assets a + JOIN lifetimes lt + ON lt.process = a.process AND lt.region = a.region + ) + SELECT + ae.technology, + ae.region, + y.year, + SUM( + CASE + WHEN y.year >= ae.commission_year AND + y.year < (ae.commission_year + ae.lifetime) + THEN ae.capacity ELSE 0 + END + ) AS value + FROM assets_enriched ae + CROSS JOIN years y + GROUP BY ae.technology, ae.region, y.year + ORDER BY ae.technology, ae.region, y.year + """, + [sector], + ).fetchdf() + + # If no assets, return an empty DataArray + if assets_df.empty: + return xr.DataArray([], dims=("asset",)) + + df = pd.DataFrame(assets_df) + df = create_multiindex( + df, + index_columns=["technology", "region", "year"], + index_names=["technology", "region", "year"], + drop_columns=True, + ) + da = create_xarray_dataset(df).value.astype(float) + + da = create_assets(da) + return da diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index ca835f922..c5c956300 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -143,3 +143,11 @@ def test_process_initial_market(con): process_initial_market( con, currency="EUR", years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] ) + + +def test_process_initial_capacity(con): + from muse.new_input.readers import process_initial_capacity + + process_initial_capacity( + con, sector="power", years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] + ) From f866d7ab6a68a4d52f4098ddcbf8d91957b191b8 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 15:26:45 +0100 Subject: [PATCH 28/43] Add functions for expanding years/regions/timeslices --- .../example/default_new_input/commodities.csv | 2 +- .../data/example/default_new_input/demand.csv | 5 + .../process_availabilities.csv | 6 +- .../default_new_input/process_flows.csv | 22 +-- .../default_new_input/process_parameters.csv | 10 +- src/muse/new_input/readers.py | 160 +++++++++++++++--- tests/test_new_readers.py | 34 ++-- 7 files changed, 181 insertions(+), 58 deletions(-) diff --git a/src/muse/data/example/default_new_input/commodities.csv b/src/muse/data/example/default_new_input/commodities.csv index b4d546a74..9fcf35cbc 100644 --- a/src/muse/data/example/default_new_input/commodities.csv +++ b/src/muse/data/example/default_new_input/commodities.csv @@ -3,4 +3,4 @@ electricity,Electricity,energy,PJ gas,Gas,energy,PJ heat,Heat,energy,PJ wind,Wind,energy,PJ -CO2f,Carbon dioxide,energy,kt +CO2f,Carbon dioxide,environmental,kt diff --git a/src/muse/data/example/default_new_input/demand.csv b/src/muse/data/example/default_new_input/demand.csv index b26c1b54d..38867e7f7 100644 --- a/src/muse/data/example/default_new_input/demand.csv +++ b/src/muse/data/example/default_new_input/demand.csv @@ -1,3 +1,8 @@ commodity_id,region_id,year,demand heat,R1,2020,10 +heat,R1,2025,13.3 +heat,R1,2030,16.7 +heat,R1,2035,20 +heat,R1,2040,23.3 +heat,R1,2045,26.7 heat,R1,2050,30 diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index a1c598cf6..39824201b 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,4 +1,4 @@ process_id,region_id,year,time_slice,limit_type,value -gassupply1,R1,2020,annual,up,0.9 -gasCCGT,R1,2020,annual,up,0.9 -windturbine,R1,2020,annual,up,0.4 +gassupply1,R1,all,annual,up,0.9 +gasCCGT,R1,all,annual,up,0.9 +windturbine,R1,all,annual,up,0.4 diff --git a/src/muse/data/example/default_new_input/process_flows.csv b/src/muse/data/example/default_new_input/process_flows.csv index 76415278a..c17e4956c 100644 --- a/src/muse/data/example/default_new_input/process_flows.csv +++ b/src/muse/data/example/default_new_input/process_flows.csv @@ -1,12 +1,12 @@ process_id,commodity_id,region_id,year,coeff -gassupply1,gas,R1,2020,1 -gasCCGT,gas,R1,2020,-1.67 -gasCCGT,electricity,R1,2020,1 -gasCCGT,CO2f,R1,2020,91.67 -windturbine,wind,R1,2020,-1 -windturbine,electricity,R1,2020,1 -gasboiler,gas,R1,2020,-1.16 -gasboiler,heat,R1,2020,1 -gasboiler,CO2f,R1,2020,64.71 -heatpump,electricity,R1,2020,-0.4 -heatpump,heat,R1,2020,1 +gassupply1,gas,R1,all,1 +gasCCGT,gas,R1,all,-1.67 +gasCCGT,electricity,R1,all,1 +gasCCGT,CO2f,R1,all,91.67 +windturbine,wind,R1,all,-1 +windturbine,electricity,R1,all,1 +gasboiler,gas,R1,all,-1.16 +gasboiler,heat,R1,all,1 +gasboiler,CO2f,R1,all,64.71 +heatpump,electricity,R1,all,-0.4 +heatpump,heat,R1,all,1 diff --git a/src/muse/data/example/default_new_input/process_parameters.csv b/src/muse/data/example/default_new_input/process_parameters.csv index 4a7f294b4..737352e1a 100644 --- a/src/muse/data/example/default_new_input/process_parameters.csv +++ b/src/muse/data/example/default_new_input/process_parameters.csv @@ -1,6 +1,6 @@ process_id,region_id,year,cap_par,fix_par,var_par,max_capacity_addition,max_capacity_growth,total_capacity_limit,lifetime,discount_rate -gassupply1,R1,2020,0,0,2.55,5,1,60,35,0.1 -gasCCGT,R1,2020,23.78234399,0,0,2,1,60,35,0.1 -windturbine,R1,2020,36.30771182,0,0,2,1,60,25,0.1 -gasboiler,R1,2020,3.8,0,0,10,0.02,60,10,0.1 -heatpump,R1,2020,8.866667,0,0,10,0.02,60,10,0.1 +gassupply1,R1,all,0,0,2.55,5,1,60,35,0.1 +gasCCGT,R1,all,23.78234399,0,0,2,1,60,35,0.1 +windturbine,R1,all,36.30771182,0,0,2,1,60,25,0.1 +gasboiler,R1,all,3.8,0,0,10,0.02,60,10,0.1 +heatpump,R1,all,8.866667,0,0,10,0.02,60,10,0.1 diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 56c964d4a..d10e8f812 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -5,8 +5,118 @@ from muse.readers.csv import create_assets, create_multiindex, create_xarray_dataset -def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: +def expand_years(source_relation: str = "rel") -> str: + """Return SQL that expands 'year' values of 'all' or semicolon lists. + + - If year == 'all': duplicates for every row in `years(year)` table. + - If year contains a semicolon-separated list (e.g. '2020;2025'): + splits and duplicates for each year item. + - Otherwise: casts the single value to BIGINT. + """ + return f""" + WITH src AS ( + SELECT *, CAST(year AS VARCHAR) AS year_str FROM {source_relation} + ), + explicit AS ( + SELECT s.* REPLACE (CAST(s.year_str AS BIGINT) AS year) + FROM src s + WHERE lower(s.year_str) <> 'all' + AND POSITION(';' IN s.year_str) = 0 + ), + multi AS ( + SELECT s.* REPLACE (CAST(TRIM(item) AS BIGINT) AS year) + FROM src s + CROSS JOIN UNNEST(str_split(s.year_str, ';')) AS t(item) + WHERE POSITION(';' IN s.year_str) > 0 + ), + expanded AS ( + SELECT s.* REPLACE (y.year AS year) + FROM src s + JOIN years y ON lower(s.year_str) = 'all' + ), + unioned AS ( + SELECT * FROM explicit + UNION ALL + SELECT * FROM multi + UNION ALL + SELECT * FROM expanded + ) + SELECT * FROM unioned + """ + + +def expand_regions(source_relation: str = "rel") -> str: + """Return SQL that expands 'region' values of 'all' or semicolon lists. + + - If region == 'all': duplicates for every row in `regions(id)` table. + - If region contains a semicolon-separated list (e.g. 'R1;R2'): + splits and duplicates for each region item. + - Otherwise: uses the single region value as-is. + """ + return f""" + WITH src AS ( + SELECT *, CAST(region AS VARCHAR) AS region_str FROM {source_relation} + ), + explicit AS ( + SELECT s.* + FROM src s + WHERE lower(s.region_str) <> 'all' + AND POSITION(';' IN s.region_str) = 0 + ), + multi AS ( + SELECT s.* REPLACE (TRIM(item) AS region) + FROM src s + CROSS JOIN UNNEST(str_split(s.region_str, ';')) AS t(item) + WHERE POSITION(';' IN s.region_str) > 0 + ), + expanded AS ( + SELECT s.* REPLACE (r.id AS region) + FROM src s + JOIN regions r ON lower(s.region_str) = 'all' + ), + unioned AS ( + SELECT * FROM explicit + UNION ALL + SELECT * FROM multi + UNION ALL + SELECT * FROM expanded + ) + SELECT * FROM unioned + """ + + +def expand_time_slices(source_relation: str = "rel") -> str: + """Return SQL that expands 'time_slice' values of 'annual' or a specific id. + + - If time_slice == 'annual': duplicates for every row in `time_slices(id)`. + - Otherwise: passes through the provided time_slice value. + """ + return f""" + WITH src AS ( + SELECT *, CAST(time_slice AS VARCHAR) AS ts_str FROM {source_relation} + ), + explicit AS ( + SELECT s.* REPLACE (s.ts_str AS time_slice) + FROM src s + WHERE lower(s.ts_str) <> 'annual' + ), + expanded AS ( + SELECT s.* REPLACE (t.id AS time_slice) + FROM src s + JOIN time_slices t ON lower(s.ts_str) = 'annual' + ), + unioned AS ( + SELECT * FROM explicit + UNION ALL + SELECT * FROM expanded + ) + SELECT * FROM unioned + """ + + +def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: con = duckdb.connect(":memory:") + insert_years(con, years) load_order = [ ("time_slices.csv", read_time_slices_csv), ("regions.csv", read_regions_csv), @@ -30,6 +140,11 @@ def read_inputs(data_dir) -> duckdb.DuckDBPyConnection: return con +def insert_years(con: duckdb.DuckDBPyConnection, years: list[int]): + con.sql("CREATE TABLE years(year BIGINT PRIMARY KEY);") + con.sql(f"INSERT INTO years VALUES {', '.join(f'({y})' for y in years)};") + + def read_time_slices_csv(buffer_, con): sql = """ CREATE TABLE time_slices ( @@ -91,8 +206,11 @@ def read_commodity_costs_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("""INSERT INTO commodity_costs SELECT - commodity_id, region_id, year, value FROM rel;""") + expansion_sql = expand_years(source_relation="rel") + con.sql( + f"""INSERT INTO commodity_costs SELECT + commodity_id, region_id, year, value FROM ({expansion_sql}) AS unioned;""" + ) def read_demand_csv(buffer_, con): @@ -106,7 +224,14 @@ def read_demand_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") + expansion_sql = expand_years(source_relation="rel") + con.sql( + f""" + INSERT INTO demand + SELECT commodity_id, region_id, year, demand + FROM ({expansion_sql}) AS unioned; + """ + ) def read_demand_slicing_csv(buffer_, con): @@ -163,8 +288,9 @@ def read_process_parameters_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = expand_years(source_relation="rel") con.sql( - """ + f""" INSERT INTO process_parameters SELECT process_id, region_id, @@ -177,7 +303,7 @@ def read_process_parameters_csv(buffer_, con): total_capacity_limit, lifetime, discount_rate - FROM rel; + FROM ({expansion_sql}) AS unioned; """ ) @@ -194,15 +320,16 @@ def read_process_flows_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = expand_years(source_relation="rel") con.sql( - """ + f""" INSERT INTO process_flows SELECT process_id, commodity_id, region_id, year, coeff - FROM rel; + FROM ({expansion_sql}) AS unioned; """ ) @@ -337,9 +464,7 @@ def process_technodictionary(con: duckdb.DuckDBPyConnection, sector: str) -> xr. return result -def process_initial_market( - con: duckdb.DuckDBPyConnection, currency: str, years: list[int] -) -> xr.Dataset: +def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr.Dataset: """Create initial market dataset with prices and zero trade variables. Args: @@ -355,10 +480,8 @@ def process_initial_market( if not isinstance(currency, str) or not currency.strip(): raise ValueError("currency must be a non-empty string") - years_sql = ", ".join(f"({y})" for y in years) df = con.execute( - f""" - WITH years(year) AS (VALUES {years_sql}) + """ SELECT r.id AS region, y.year AS year, @@ -466,7 +589,7 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis def process_initial_capacity( - con: duckdb.DuckDBPyConnection, sector: str, years: list[int] + con: duckdb.DuckDBPyConnection, sector: str ) -> xr.DataArray: """Create existing capacity over time from assets and lifetimes. @@ -479,16 +602,13 @@ def process_initial_capacity( xr.DataArray with dims (asset) and coordinates (asset, technology, region, year) showing capacity available in each year based on commission year and lifetime. """ - years_sql = ", ".join(f"({y})" for y in years) - # Compute capacity trajectory per technology/region/year # Note: this sums up the capacity of all assets in the same technology/region # I think ideally we wouldn't do that and would keep these as separate assets # Also, this isn't taking into account agent ownership assets_df = con.execute( - f""" - WITH years(year) AS (VALUES {years_sql}), - lifetimes AS ( + """ + WITH lifetimes AS ( SELECT DISTINCT pp.process, pp.region, pp.lifetime FROM process_parameters pp JOIN processes p ON p.id = pp.process diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index c5c956300..479d9cd4f 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -17,7 +17,9 @@ def default_new_input(tmp_path) -> Path: def con(default_new_input) -> duckdb.DuckDBPyConnection: from muse.new_input.readers import read_inputs - return read_inputs(default_new_input) + return read_inputs( + default_new_input, years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] + ) def test_read_time_slices_csv(con): @@ -30,14 +32,14 @@ def test_read_time_slices_csv(con): def test_read_regions_csv(con): data = con.sql("SELECT * FROM regions").fetchnumpy() - assert data["id"] == np.array(["R1"]) + assert next(iter(data["id"])) == "R1" def test_read_commodities_csv(con): data = con.sql("SELECT * FROM commodities").fetchnumpy() - assert list(data["id"]) == ["electricity", "gas", "heat", "wind", "CO2f"] - assert list(data["type"]) == ["energy"] * 5 - assert list(data["unit"]) == ["PJ"] * 4 + ["kt"] + assert next(iter(data["id"])) == "electricity" + assert next(iter(data["type"])) == "energy" + assert next(iter(data["unit"])) == "PJ" def test_read_commodity_costs_csv(con): @@ -50,17 +52,17 @@ def test_read_commodity_costs_csv(con): def test_read_demand_csv(con): data = con.sql("SELECT * FROM demand").fetchnumpy() - assert np.all(data["year"] == np.array([2020, 2050])) - assert np.all(data["commodity"] == np.array(["heat", "heat"])) - assert np.all(data["region"] == np.array(["R1", "R1"])) - assert np.all(data["demand"] == np.array([10, 30])) + assert next(iter(data["year"])) == 2020 + assert next(iter(data["commodity"])) == "heat" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["demand"])) == approx(10) def test_read_demand_slicing_csv(con): data = con.sql("SELECT * FROM demand_slicing").fetchnumpy() - assert np.all(data["commodity"] == "heat") - assert np.all(data["region"] == "R1") - assert np.all(data["fraction"] == np.array([0.1, 0.15, 0.1, 0.15, 0.3, 0.2])) + assert next(iter(data["commodity"])) == "heat" + assert next(iter(data["region"])) == "R1" + assert next(iter(data["fraction"])) == approx(0.1) def test_read_sectors_csv(con): @@ -140,14 +142,10 @@ def test_process_agent_parameters(con): def test_process_initial_market(con): from muse.new_input.readers import process_initial_market - process_initial_market( - con, currency="EUR", years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] - ) + process_initial_market(con, currency="EUR") def test_process_initial_capacity(con): from muse.new_input.readers import process_initial_capacity - process_initial_capacity( - con, sector="power", years=[2020, 2025, 2030, 2035, 2040, 2045, 2050] - ) + process_initial_capacity(con, sector="power") From 17ab0cb3f387cf7f0dc6f20b0589cae7c5fe206a Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 16:54:02 +0100 Subject: [PATCH 29/43] Allow chained expansions --- src/muse/new_input/readers.py | 187 ++++++++++++---------------------- 1 file changed, 67 insertions(+), 120 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index d10e8f812..a280c93e7 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -6,111 +6,52 @@ def expand_years(source_relation: str = "rel") -> str: - """Return SQL that expands 'year' values of 'all' or semicolon lists. - - - If year == 'all': duplicates for every row in `years(year)` table. - - If year contains a semicolon-separated list (e.g. '2020;2025'): - splits and duplicates for each year item. - - Otherwise: casts the single value to BIGINT. - """ + """Return a composable SQL that expands 'year' over 'all' or semicolon lists.""" return f""" - WITH src AS ( - SELECT *, CAST(year AS VARCHAR) AS year_str FROM {source_relation} - ), - explicit AS ( - SELECT s.* REPLACE (CAST(s.year_str AS BIGINT) AS year) - FROM src s - WHERE lower(s.year_str) <> 'all' - AND POSITION(';' IN s.year_str) = 0 - ), - multi AS ( - SELECT s.* REPLACE (CAST(TRIM(item) AS BIGINT) AS year) - FROM src s - CROSS JOIN UNNEST(str_split(s.year_str, ';')) AS t(item) - WHERE POSITION(';' IN s.year_str) > 0 - ), - expanded AS ( - SELECT s.* REPLACE (y.year AS year) - FROM src s - JOIN years y ON lower(s.year_str) = 'all' - ), - unioned AS ( - SELECT * FROM explicit - UNION ALL - SELECT * FROM multi - UNION ALL - SELECT * FROM expanded - ) - SELECT * FROM unioned - """ + SELECT s.* REPLACE (CAST(s.year AS BIGINT) AS year) + FROM {source_relation} s + WHERE lower(CAST(s.year AS VARCHAR)) <> 'all' AND POSITION(';' IN CAST(s.year AS VARCHAR)) = 0 + UNION ALL + SELECT s.* REPLACE (CAST(TRIM(item) AS BIGINT) AS year) + FROM {source_relation} s + CROSS JOIN UNNEST(str_split(CAST(s.year AS VARCHAR), ';')) AS t(item) + WHERE POSITION(';' IN CAST(s.year AS VARCHAR)) > 0 + UNION ALL + SELECT s.* REPLACE (y.year AS year) + FROM {source_relation} s + CROSS JOIN years y + WHERE lower(CAST(s.year AS VARCHAR)) = 'all' + """ # noqa: E501 def expand_regions(source_relation: str = "rel") -> str: - """Return SQL that expands 'region' values of 'all' or semicolon lists. - - - If region == 'all': duplicates for every row in `regions(id)` table. - - If region contains a semicolon-separated list (e.g. 'R1;R2'): - splits and duplicates for each region item. - - Otherwise: uses the single region value as-is. - """ + """Return a composable SQL that expands 'region_id' over 'all' or lists.""" return f""" - WITH src AS ( - SELECT *, CAST(region AS VARCHAR) AS region_str FROM {source_relation} - ), - explicit AS ( - SELECT s.* - FROM src s - WHERE lower(s.region_str) <> 'all' - AND POSITION(';' IN s.region_str) = 0 - ), - multi AS ( - SELECT s.* REPLACE (TRIM(item) AS region) - FROM src s - CROSS JOIN UNNEST(str_split(s.region_str, ';')) AS t(item) - WHERE POSITION(';' IN s.region_str) > 0 - ), - expanded AS ( - SELECT s.* REPLACE (r.id AS region) - FROM src s - JOIN regions r ON lower(s.region_str) = 'all' - ), - unioned AS ( - SELECT * FROM explicit - UNION ALL - SELECT * FROM multi - UNION ALL - SELECT * FROM expanded - ) - SELECT * FROM unioned - """ + SELECT s.* + FROM {source_relation} s + WHERE lower(CAST(s.region_id AS VARCHAR)) <> 'all' AND POSITION(';' IN CAST(s.region_id AS VARCHAR)) = 0 + UNION ALL + SELECT s.* REPLACE (TRIM(item) AS region_id) + FROM {source_relation} s + CROSS JOIN UNNEST(str_split(CAST(s.region_id AS VARCHAR), ';')) AS t(item) + WHERE POSITION(';' IN CAST(s.region_id AS VARCHAR)) > 0 + UNION ALL + SELECT s.* REPLACE (r.id AS region_id) + FROM {source_relation} s + JOIN regions r ON lower(CAST(s.region_id AS VARCHAR)) = 'all' + """ # noqa: E501 def expand_time_slices(source_relation: str = "rel") -> str: - """Return SQL that expands 'time_slice' values of 'annual' or a specific id. - - - If time_slice == 'annual': duplicates for every row in `time_slices(id)`. - - Otherwise: passes through the provided time_slice value. - """ + """Return a composable SQL that expands 'time_slice' over 'annual'.""" return f""" - WITH src AS ( - SELECT *, CAST(time_slice AS VARCHAR) AS ts_str FROM {source_relation} - ), - explicit AS ( - SELECT s.* REPLACE (s.ts_str AS time_slice) - FROM src s - WHERE lower(s.ts_str) <> 'annual' - ), - expanded AS ( - SELECT s.* REPLACE (t.id AS time_slice) - FROM src s - JOIN time_slices t ON lower(s.ts_str) = 'annual' - ), - unioned AS ( - SELECT * FROM explicit - UNION ALL - SELECT * FROM expanded - ) - SELECT * FROM unioned + SELECT s.* + FROM {source_relation} s + WHERE lower(CAST(s.time_slice AS VARCHAR)) <> 'annual' + UNION ALL + SELECT s.* REPLACE (t.id AS time_slice) + FROM {source_relation} s + JOIN time_slices t ON lower(CAST(s.time_slice AS VARCHAR)) = 'annual' """ @@ -195,6 +136,16 @@ def read_regions_csv(buffer_, con): con.sql("INSERT INTO regions SELECT id FROM rel;") +def read_sectors_csv(buffer_, con): + sql = """CREATE TABLE sectors ( + id VARCHAR PRIMARY KEY, + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO sectors SELECT id FROM rel;") + + def read_commodity_costs_csv(buffer_, con): sql = """CREATE TABLE commodity_costs ( commodity VARCHAR REFERENCES commodities(id), @@ -206,10 +157,13 @@ def read_commodity_costs_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - expansion_sql = expand_years(source_relation="rel") + years_sql = expand_years(source_relation="rel") + regions_sql = expand_regions(source_relation=f"({years_sql})") + expansion_sql = regions_sql con.sql( f"""INSERT INTO commodity_costs SELECT - commodity_id, region_id, year, value FROM ({expansion_sql}) AS unioned;""" + commodity_id, region_id, year, value FROM ({expansion_sql}) AS unioned; + """ ) @@ -224,14 +178,7 @@ def read_demand_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - expansion_sql = expand_years(source_relation="rel") - con.sql( - f""" - INSERT INTO demand - SELECT commodity_id, region_id, year, demand - FROM ({expansion_sql}) AS unioned; - """ - ) + con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") def read_demand_slicing_csv(buffer_, con): @@ -245,18 +192,14 @@ def read_demand_slicing_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("""INSERT INTO demand_slicing SELECT - commodity_id, region_id, time_slice, fraction FROM rel;""") - - -def read_sectors_csv(buffer_, con): - sql = """CREATE TABLE sectors ( - id VARCHAR PRIMARY KEY, - ); - """ - con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO sectors SELECT id FROM rel;") + regions_sql = expand_regions(source_relation="rel") + ts_sql = expand_time_slices(source_relation=f"({regions_sql})") + expansion_sql = ts_sql + con.sql( + f"""INSERT INTO demand_slicing SELECT + commodity_id, region_id, time_slice, fraction FROM ({expansion_sql}) AS unioned; + """ # noqa: E501 + ) def read_processes_csv(buffer_, con): @@ -288,7 +231,9 @@ def read_process_parameters_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - expansion_sql = expand_years(source_relation="rel") + years_sql = expand_years(source_relation="rel") + regions_sql = expand_regions(source_relation=f"({years_sql})") + expansion_sql = regions_sql con.sql( f""" INSERT INTO process_parameters SELECT @@ -320,7 +265,9 @@ def read_process_flows_csv(buffer_, con): """ con.sql(sql) rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - expansion_sql = expand_years(source_relation="rel") + years_sql = expand_years(source_relation="rel") + regions_sql = expand_regions(source_relation=f"({years_sql})") + expansion_sql = regions_sql con.sql( f""" INSERT INTO process_flows SELECT From 2ec61a5755894802a61cdbd754cc87b16dd80cef Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 18:21:31 +0100 Subject: [PATCH 30/43] Add some validation checks and filling missing data --- src/muse/new_input/readers.py | 213 +++++++++++++++++++++++++++++++--- 1 file changed, 199 insertions(+), 14 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index a280c93e7..b668a1e9b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -55,6 +55,159 @@ def expand_time_slices(source_relation: str = "rel") -> str: """ +def check_years_for_region_commodity(con: duckdb.DuckDBPyConnection, table) -> None: + """Validate that commodities present have data for all regions/years. + + Raises ValueError if any (commodity, region, year) combination is missing. + """ + query = f""" + WITH present_commodities AS ( + SELECT DISTINCT commodity AS commodity FROM {table} + ), + full_grid AS ( + SELECT pc.commodity, r.id AS region, y.year AS year + FROM present_commodities pc + CROSS JOIN regions r + CROSS JOIN years y + ) + SELECT COUNT(*) AS missing_count + FROM full_grid fg + LEFT JOIN {table} t + ON t.commodity = fg.commodity + AND t.region = fg.region + AND t.year = fg.year + WHERE t.commodity IS NULL + """ + missing_count = con.execute(query).fetchone()[0] + if missing_count: + raise ValueError( + "commodity_costs must include all regions/years for any mentioned commodity" + ) + + +def fill_missing_commodity_region_year( + con: duckdb.DuckDBPyConnection, table: str, value_column: str, fill_value: float +) -> None: + """Insert fill_value for any missing (commodity, region, year) combinations. + + Builds the full grid from tables `commodities`, `regions`, and `years` and + inserts rows only where the given table lacks a record. Existing rows are + not modified. + """ + con.execute( + f""" + WITH full_grid AS ( + SELECT c.id AS commodity, r.id AS region, y.year AS year + FROM commodities c + CROSS JOIN regions r + CROSS JOIN years y + ), + missing AS ( + SELECT fg.commodity, fg.region, fg.year + FROM full_grid fg + LEFT JOIN {table} t + ON t.commodity = fg.commodity + AND t.region = fg.region + AND t.year = fg.year + WHERE t.commodity IS NULL + ) + INSERT INTO {table} (commodity, region, year, {value_column}) + SELECT commodity, region, year, ? AS {value_column} + FROM missing + """, + [fill_value], + ) + + +def check_process_region_year_coverage( + con: duckdb.DuckDBPyConnection, table: str +) -> None: + """Validate that all combinations of process/region/year exist in table. + + Raises ValueError if any (process, region, year) combination is missing. + """ + query = f""" + WITH full_grid AS ( + SELECT p.id AS process, r.id AS region, y.year AS year + FROM processes p + CROSS JOIN regions r + CROSS JOIN years y + ) + SELECT COUNT(*) AS missing_count + FROM full_grid fg + LEFT JOIN {table} t + ON t.process = fg.process + AND t.region = fg.region + AND t.year = fg.year + WHERE t.process IS NULL + """ + missing_count = con.execute(query).fetchone()[0] + if missing_count: + raise ValueError( + "process_parameters must include all combinations of process/region/year" + ) + + +def ensure_agents_region_sector_coverage( + con: duckdb.DuckDBPyConnection, table: str = "agents" +) -> None: + """Validate there is at least one agent for every (region, sector).""" + query = f""" + WITH full_grid AS ( + SELECT r.id AS region, s.id AS sector + FROM regions r + CROSS JOIN sectors s + ), + present AS ( + SELECT DISTINCT region, sector FROM {table} + ) + SELECT COUNT(*) AS missing_count + FROM full_grid fg + LEFT JOIN present p + ON p.region = fg.region AND p.sector = fg.sector + WHERE p.region IS NULL + """ + missing_count = con.execute(query).fetchone()[0] + if missing_count: + raise ValueError("agents must include at least one agent per (region, sector)") + + +def ensure_full_process_commodity_region_year( + con: duckdb.DuckDBPyConnection, table: str = "process_flows" +) -> None: + """Validate that each present (process, commodity) has all (region, year). + + Raises ValueError if any required combinations are missing. + """ + query = f""" + WITH present AS ( + SELECT DISTINCT process, commodity FROM {table} + ), + full_grid AS ( + SELECT p.process, p.commodity, r.id AS region, y.year AS year + FROM present p + CROSS JOIN regions r + CROSS JOIN years y + ), + missing AS ( + SELECT fg.process, fg.commodity, fg.region, fg.year + FROM full_grid fg + LEFT JOIN {table} t + ON t.process = fg.process + AND t.commodity = fg.commodity + AND t.region = fg.region + AND t.year = fg.year + WHERE t.process IS NULL + ) + SELECT COUNT(*) AS missing_count FROM missing + """ + missing_count = con.execute(query).fetchone()[0] + if missing_count: + raise ValueError( + "process_flows must include all regions/years for any present (process, commodity)" # noqa: E501 + ) + + def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: con = duckdb.connect(":memory:") insert_years(con, years) @@ -161,11 +314,21 @@ def read_commodity_costs_csv(buffer_, con): regions_sql = expand_regions(source_relation=f"({years_sql})") expansion_sql = regions_sql con.sql( - f"""INSERT INTO commodity_costs SELECT - commodity_id, region_id, year, value FROM ({expansion_sql}) AS unioned; + f""" + INSERT INTO commodity_costs + SELECT commodity_id, region_id, year, value + FROM ({expansion_sql}) AS unioned; """ ) + # Validate coverage + check_years_for_region_commodity(con, table="commodity_costs") + + # Insert data for missing commodities + fill_missing_commodity_region_year( + con, table="commodity_costs", value_column="value", fill_value=0.0 + ) + def read_demand_csv(buffer_, con): sql = """CREATE TABLE demand ( @@ -180,6 +343,14 @@ def read_demand_csv(buffer_, con): rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") + # Validate coverage + check_years_for_region_commodity(con, table="demand") + + # Insert data for missing commodities + fill_missing_commodity_region_year( + con, table="demand", value_column="demand", fill_value=0.0 + ) + def read_demand_slicing_csv(buffer_, con): sql = """CREATE TABLE demand_slicing ( @@ -252,6 +423,9 @@ def read_process_parameters_csv(buffer_, con): """ ) + # Validate coverage + check_process_region_year_coverage(con, table="process_parameters") + def read_process_flows_csv(buffer_, con): sql = """CREATE TABLE process_flows ( @@ -280,6 +454,9 @@ def read_process_flows_csv(buffer_, con): """ ) + # Validate coverage + ensure_full_process_commodity_region_year(con) + def read_agents_csv(buffer_, con): sql = """CREATE TABLE agents ( @@ -306,6 +483,9 @@ def read_agents_csv(buffer_, con): """ ) + # Validate coverage across region/sector + ensure_agents_region_sector_coverage(con) + def read_agent_objectives_csv(buffer_, con): sql = """CREATE TABLE agent_objectives ( @@ -329,6 +509,17 @@ def read_agent_objectives_csv(buffer_, con): """ ) + # Validate: each agent must have at least one objective + if con.execute( + """ + SELECT EXISTS ( + SELECT 1 FROM agents a + WHERE a.id NOT IN (SELECT agent FROM agent_objectives) + ) + """ + ).fetchone()[0]: + raise ValueError("Each agent must have at least one objective") + def read_assets_csv(buffer_, con): sql = """CREATE TABLE assets ( @@ -430,23 +621,17 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. df = con.execute( """ SELECT - r.id AS region, - y.year AS year, - c.id AS commodity, - COALESCE(cc.value, 0) AS prices, + cc.region AS region, + cc.year AS year, + cc.commodity AS commodity, + cc.value AS prices, (? || '/' || c.unit) AS units_prices - FROM regions r - CROSS JOIN years y - CROSS JOIN commodities c - LEFT JOIN commodity_costs cc - ON cc.region = r.id AND cc.year = y.year AND cc.commodity = c.id + FROM commodity_costs cc + JOIN commodities c ON c.id = cc.commodity """, [currency], ).fetchdf() - if df.empty: - raise ValueError("No commodity cost data found to build initial market.") - # Build dataset from prices prices_df = create_multiindex( df, From 8bfc5d996cb10fc4a33e1acf228149330ed2c9fd Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 18:35:19 +0100 Subject: [PATCH 31/43] read_process_availabilities --- src/muse/new_input/readers.py | 32 ++++++++++++++ tests/test_new_readers.py | 80 ++++++++--------------------------- 2 files changed, 50 insertions(+), 62 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index b668a1e9b..205ae96e3 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -219,6 +219,7 @@ def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: ("processes.csv", read_processes_csv), ("process_parameters.csv", read_process_parameters_csv), ("process_flows.csv", read_process_flows_csv), + ("process_availabilities.csv", read_process_availabilities_csv), ("agents.csv", read_agents_csv), ("agent_objectives.csv", read_agent_objectives_csv), ("assets.csv", read_assets_csv), @@ -458,6 +459,37 @@ def read_process_flows_csv(buffer_, con): ensure_full_process_commodity_region_year(con) +def read_process_availabilities_csv(buffer_, con): + sql = """CREATE TABLE process_availabilities ( + process VARCHAR REFERENCES processes(id), + region VARCHAR REFERENCES regions(id), + year BIGINT, + time_slice VARCHAR REFERENCES time_slices(id), + limit_type VARCHAR CHECK (limit_type IN ('up','down')), + value DOUBLE, + PRIMARY KEY (process, region, year, time_slice, limit_type) + ); + """ + con.sql(sql) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + years_sql = expand_years(source_relation="rel") + regions_sql = expand_regions(source_relation=f"({years_sql})") + ts_sql = expand_time_slices(source_relation=f"({regions_sql})") + expansion_sql = ts_sql + con.sql( + f""" + INSERT INTO process_availabilities SELECT + process_id, + region_id, + year, + time_slice, + limit_type, + value + FROM ({expansion_sql}) AS unioned; + """ + ) + + def read_agents_csv(buffer_, con): sql = """CREATE TABLE agents ( id VARCHAR PRIMARY KEY, diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 479d9cd4f..57c62e0e6 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -1,8 +1,7 @@ from pathlib import Path import duckdb -import numpy as np -from pytest import approx, fixture +from pytest import fixture @fixture @@ -23,102 +22,59 @@ def con(default_new_input) -> duckdb.DuckDBPyConnection: def test_read_time_slices_csv(con): - data = con.sql("SELECT * FROM time_slices").fetchnumpy() - assert next(iter(data["season"])) == "all-year" - assert next(iter(data["day"])) == "all-week" - assert next(iter(data["time_of_day"])) == "night" - assert next(iter(data["fraction"])) == approx(0.166667) + con.sql("SELECT * FROM time_slices").fetchnumpy() def test_read_regions_csv(con): - data = con.sql("SELECT * FROM regions").fetchnumpy() - assert next(iter(data["id"])) == "R1" + con.sql("SELECT * FROM regions").fetchnumpy() def test_read_commodities_csv(con): - data = con.sql("SELECT * FROM commodities").fetchnumpy() - assert next(iter(data["id"])) == "electricity" - assert next(iter(data["type"])) == "energy" - assert next(iter(data["unit"])) == "PJ" + con.sql("SELECT * FROM commodities").fetchnumpy() def test_read_commodity_costs_csv(con): - data = con.sql("SELECT * FROM commodity_costs").fetchnumpy() - assert next(iter(data["commodity"])) == "electricity" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["year"])) == 2020 - assert next(iter(data["value"])) == approx(19.5) + con.sql("SELECT * FROM commodity_costs").fetchnumpy() def test_read_demand_csv(con): - data = con.sql("SELECT * FROM demand").fetchnumpy() - assert next(iter(data["year"])) == 2020 - assert next(iter(data["commodity"])) == "heat" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["demand"])) == approx(10) + con.sql("SELECT * FROM demand").fetchnumpy() def test_read_demand_slicing_csv(con): - data = con.sql("SELECT * FROM demand_slicing").fetchnumpy() - assert next(iter(data["commodity"])) == "heat" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["fraction"])) == approx(0.1) + con.sql("SELECT * FROM demand_slicing").fetchnumpy() def test_read_sectors_csv(con): - data = con.sql("SELECT * FROM sectors").fetchnumpy() - assert next(iter(data["id"])) == "gas" + con.sql("SELECT * FROM sectors").fetchnumpy() def test_read_processes_csv(con): - data = con.sql("SELECT * FROM processes").fetchnumpy() - assert next(iter(data["id"])) == "gassupply1" - assert next(iter(data["sector"])) == "gas" + con.sql("SELECT * FROM processes").fetchnumpy() def test_read_process_parameters_csv(con): - data = con.sql("SELECT * FROM process_parameters").fetchnumpy() - assert next(iter(data["process"])) == "gassupply1" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["year"])) == 2020 - assert next(iter(data["cap_par"])) == approx(0) - assert next(iter(data["discount_rate"])) == approx(0.1) + con.sql("SELECT * FROM process_parameters").fetchnumpy() def test_read_process_flows_csv(con): - data = con.sql("SELECT * FROM process_flows").fetchnumpy() - assert next(iter(data["process"])) == "gassupply1" - assert next(iter(data["commodity"])) == "gas" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["year"])) == 2020 - assert next(iter(data["coeff"])) == approx(1) + con.sql("SELECT * FROM process_flows").fetchnumpy() + + +def test_read_process_availabilities_csv(con): + con.sql("SELECT * FROM process_availabilities").fetchnumpy() def test_read_agents_csv(con): - data = con.sql("SELECT * FROM agents").fetchnumpy() - assert next(iter(data["id"])) == "A1_RES" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["sector"])) == "residential" - assert next(iter(data["search_rule"])) == "all" - assert next(iter(data["decision_rule"])) == "single" - assert next(iter(data["quantity"])) == approx(1) + con.sql("SELECT * FROM agents").fetchnumpy() def test_read_agent_objectives_csv(con): - data = con.sql("SELECT * FROM agent_objectives").fetchnumpy() - assert next(iter(data["agent"])) == "A1_RES" - assert next(iter(data["objective_type"])) == "LCOE" - assert next(iter(data["decision_weight"])) == approx(1) - assert next(iter(data["objective_sort"])) is np.True_ + con.sql("SELECT * FROM agent_objectives").fetchnumpy() def test_read_assets_csv(con): - data = con.sql("SELECT * FROM assets").fetchnumpy() - assert next(iter(data["agent"])) == "A1_GAS" - assert next(iter(data["process"])) == "gassupply1" - assert next(iter(data["region"])) == "R1" - assert next(iter(data["commission_year"])) == 1995 - assert next(iter(data["capacity"])) == approx(7.5) + con.sql("SELECT * FROM assets").fetchnumpy() def test_process_global_commodities(con): From 865c5db538f3fb5f8a341f66139a5a1f8c33e63f Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 19:04:53 +0100 Subject: [PATCH 32/43] Make helpers more generic --- src/muse/new_input/readers.py | 320 ++++++++++++++++++++-------------- 1 file changed, 191 insertions(+), 129 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 205ae96e3..e4be42892 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -4,6 +4,15 @@ from muse.readers.csv import create_assets, create_multiindex, create_xarray_dataset +# Global mapping from dimension name to (source_table, source_column) +DIM_TO_SOURCE: dict[str, tuple[str, str]] = { + "process": ("processes", "id"), + "commodity": ("commodities", "id"), + "region": ("regions", "id"), + "year": ("years", "year"), + "time_slice": ("time_slices", "id"), +} + def expand_years(source_relation: str = "rel") -> str: """Return a composable SQL that expands 'year' over 'all' or semicolon lists.""" @@ -55,157 +64,152 @@ def expand_time_slices(source_relation: str = "rel") -> str: """ -def check_years_for_region_commodity(con: duckdb.DuckDBPyConnection, table) -> None: - """Validate that commodities present have data for all regions/years. +def validate_present_full_dim_coverage( + con: duckdb.DuckDBPyConnection, + table: str, + present_cols: list[str], + dims: list[str], + error_message: str, +) -> None: + """Ensure that for each present entity (present_cols), all dims combos exist. - Raises ValueError if any (commodity, region, year) combination is missing. + The target table must use these exact column names. """ + for d in dims: + if d not in DIM_TO_SOURCE: + raise ValueError(f"Unsupported dim: {d}") + + present_cols_csv = ", ".join(present_cols) + select_present = f"SELECT DISTINCT {present_cols_csv} FROM {table}" + + fg_parts = [f"p.{c} AS {c}" for c in present_cols] + cross_joins = [] + for d in dims: + src_table, src_col = DIM_TO_SOURCE[d] + alias = f"{d[0]}src" + fg_parts.append(f"{alias}.{src_col} AS {d}") + cross_joins.append(f"CROSS JOIN {src_table} {alias}") + fg_select = ", ".join(fg_parts) + cross_join_sql = "\n ".join(cross_joins) + + join_keys = " AND ".join([f"t.{c} = fg.{c}" for c in present_cols + dims]) + null_check_col = present_cols[0] + query = f""" - WITH present_commodities AS ( - SELECT DISTINCT commodity AS commodity FROM {table} + WITH present AS ( + {select_present} ), full_grid AS ( - SELECT pc.commodity, r.id AS region, y.year AS year - FROM present_commodities pc - CROSS JOIN regions r - CROSS JOIN years y + SELECT {fg_select} + FROM present p + {cross_join_sql} ) SELECT COUNT(*) AS missing_count FROM full_grid fg LEFT JOIN {table} t - ON t.commodity = fg.commodity - AND t.region = fg.region - AND t.year = fg.year - WHERE t.commodity IS NULL + ON {join_keys} + WHERE t.{null_check_col} IS NULL """ missing_count = con.execute(query).fetchone()[0] if missing_count: - raise ValueError( - "commodity_costs must include all regions/years for any mentioned commodity" - ) + raise ValueError(error_message) -def fill_missing_commodity_region_year( - con: duckdb.DuckDBPyConnection, table: str, value_column: str, fill_value: float +def validate_full_coverage( + con: duckdb.DuckDBPyConnection, table: str, dims: list[str] ) -> None: - """Insert fill_value for any missing (commodity, region, year) combinations. + """Validate that all combinations across dims exist in table.""" + for d in dims: + if d not in DIM_TO_SOURCE: + raise ValueError(f"Unsupported dim: {d}") + + # Build full grid FROM and CROSS JOINs over all dims + select_parts = [] + from_and_joins = [] + first = True + for d in dims: + src_table, src_col = DIM_TO_SOURCE[d] + alias = f"{d[0]}src" + select_parts.append(f"{alias}.{src_col} AS {d}") + if first: + from_and_joins.append(f"FROM {src_table} {alias}") + first = False + else: + from_and_joins.append(f"CROSS JOIN {src_table} {alias}") + + full_select_cols = ", ".join(select_parts) + from_clause_sql = "\n ".join(from_and_joins) + join_keys = " AND ".join([f"t.{d} = fg.{d}" for d in dims]) + first_dim = dims[0] - Builds the full grid from tables `commodities`, `regions`, and `years` and - inserts rows only where the given table lacks a record. Existing rows are - not modified. - """ - con.execute( - f""" - WITH full_grid AS ( - SELECT c.id AS commodity, r.id AS region, y.year AS year - FROM commodities c - CROSS JOIN regions r - CROSS JOIN years y - ), - missing AS ( - SELECT fg.commodity, fg.region, fg.year - FROM full_grid fg - LEFT JOIN {table} t - ON t.commodity = fg.commodity - AND t.region = fg.region - AND t.year = fg.year - WHERE t.commodity IS NULL - ) - INSERT INTO {table} (commodity, region, year, {value_column}) - SELECT commodity, region, year, ? AS {value_column} - FROM missing - """, - [fill_value], - ) - - -def check_process_region_year_coverage( - con: duckdb.DuckDBPyConnection, table: str -) -> None: - """Validate that all combinations of process/region/year exist in table. - - Raises ValueError if any (process, region, year) combination is missing. - """ query = f""" WITH full_grid AS ( - SELECT p.id AS process, r.id AS region, y.year AS year - FROM processes p - CROSS JOIN regions r - CROSS JOIN years y + SELECT {full_select_cols} + {from_clause_sql} ) SELECT COUNT(*) AS missing_count FROM full_grid fg - LEFT JOIN {table} t - ON t.process = fg.process - AND t.region = fg.region - AND t.year = fg.year - WHERE t.process IS NULL + LEFT JOIN {table} t ON {join_keys} + WHERE t.{first_dim} IS NULL """ missing_count = con.execute(query).fetchone()[0] if missing_count: - raise ValueError( - "process_parameters must include all combinations of process/region/year" - ) + raise ValueError("Missing required combinations across dims") -def ensure_agents_region_sector_coverage( - con: duckdb.DuckDBPyConnection, table: str = "agents" -) -> None: - """Validate there is at least one agent for every (region, sector).""" - query = f""" - WITH full_grid AS ( - SELECT r.id AS region, s.id AS sector - FROM regions r - CROSS JOIN sectors s - ), - present AS ( - SELECT DISTINCT region, sector FROM {table} - ) - SELECT COUNT(*) AS missing_count - FROM full_grid fg - LEFT JOIN present p - ON p.region = fg.region AND p.sector = fg.sector - WHERE p.region IS NULL - """ - missing_count = con.execute(query).fetchone()[0] - if missing_count: - raise ValueError("agents must include at least one agent per (region, sector)") - - -def ensure_full_process_commodity_region_year( - con: duckdb.DuckDBPyConnection, table: str = "process_flows" +def fill_missing_dim_combinations( + con: duckdb.DuckDBPyConnection, + table: str, + dims: list[str], + value_column: str, + fill_value: float, ) -> None: - """Validate that each present (process, commodity) has all (region, year). + """Insert fill_value for any missing combinations across the given dims. - Raises ValueError if any required combinations are missing. - """ - query = f""" - WITH present AS ( - SELECT DISTINCT process, commodity FROM {table} - ), - full_grid AS ( - SELECT p.process, p.commodity, r.id AS region, y.year AS year - FROM present p - CROSS JOIN regions r - CROSS JOIN years y - ), - missing AS ( - SELECT fg.process, fg.commodity, fg.region, fg.year - FROM full_grid fg - LEFT JOIN {table} t - ON t.process = fg.process - AND t.commodity = fg.commodity - AND t.region = fg.region - AND t.year = fg.year - WHERE t.process IS NULL - ) - SELECT COUNT(*) AS missing_count FROM missing + The target table must use these exact column names for the dims. """ - missing_count = con.execute(query).fetchone()[0] - if missing_count: - raise ValueError( - "process_flows must include all regions/years for any present (process, commodity)" # noqa: E501 + for d in dims: + if d not in DIM_TO_SOURCE: + raise ValueError(f"Unsupported dim: {d}") + + # Build full grid anchored on present values of the first dim (e.g., commodity) + present_key = dims[0] + present_cte = f"SELECT DISTINCT {present_key} FROM {table}" + + select_parts = [f"p.{present_key} AS {present_key}"] + from_and_joins = [f"FROM ({present_cte}) p"] + for d in dims[1:]: + src_table, src_col = DIM_TO_SOURCE[d] + alias = f"{d[0]}src" + select_parts.append(f"{alias}.{src_col} AS {d}") + from_and_joins.append(f"CROSS JOIN {src_table} {alias}") + full_select_cols = ", ".join(select_parts) + from_clause_sql = "\n ".join(from_and_joins) + + # Build join keys to detect missing rows + join_keys = " AND ".join([f"t.{d} = fg.{d}" for d in dims]) + insert_cols_csv = ", ".join([*dims, value_column]) + select_cols_missing = ", ".join([f"fg.{d}" for d in dims]) + select_cols_plain = ", ".join(dims) + + con.execute( + f""" + WITH full_grid AS ( + SELECT {full_select_cols} + {from_clause_sql} + ), + missing AS ( + SELECT {select_cols_missing} + FROM full_grid fg + LEFT JOIN {table} t ON {join_keys} + WHERE t.{present_key} IS NULL ) + INSERT INTO {table} ({insert_cols_csv}) + SELECT {select_cols_plain}, ? AS {value_column} + FROM missing + """, + [fill_value], + ) def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: @@ -323,11 +327,23 @@ def read_commodity_costs_csv(buffer_, con): ) # Validate coverage - check_years_for_region_commodity(con, table="commodity_costs") + validate_present_full_dim_coverage( + con, + table="commodity_costs", + present_cols=["commodity"], + dims=["region", "year"], + error_message=( + "commodity_costs must include all regions/years for any mentioned commodity" + ), + ) # Insert data for missing commodities - fill_missing_commodity_region_year( - con, table="commodity_costs", value_column="value", fill_value=0.0 + fill_missing_dim_combinations( + con, + table="commodity_costs", + dims=["commodity", "region", "year"], + value_column="value", + fill_value=0.0, ) @@ -345,11 +361,23 @@ def read_demand_csv(buffer_, con): con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") # Validate coverage - check_years_for_region_commodity(con, table="demand") + validate_present_full_dim_coverage( + con, + table="demand", + present_cols=["commodity"], + dims=["region", "year"], + error_message=( + "commodity_costs must include all regions/years for any mentioned commodity" + ), + ) # Insert data for missing commodities - fill_missing_commodity_region_year( - con, table="demand", value_column="demand", fill_value=0.0 + fill_missing_dim_combinations( + con, + table="demand", + dims=["commodity", "region", "year"], + value_column="demand", + fill_value=0.0, ) @@ -425,7 +453,9 @@ def read_process_parameters_csv(buffer_, con): ) # Validate coverage - check_process_region_year_coverage(con, table="process_parameters") + validate_full_coverage( + con, table="process_parameters", dims=["process", "region", "year"] + ) def read_process_flows_csv(buffer_, con): @@ -456,7 +486,15 @@ def read_process_flows_csv(buffer_, con): ) # Validate coverage - ensure_full_process_commodity_region_year(con) + validate_present_full_dim_coverage( + con, + table="process_flows", + present_cols=["process", "commodity"], + dims=["region", "year"], + error_message=( + "process_flows must include all regions/years for any present (process, commodity)" # noqa: E501 + ), + ) def read_process_availabilities_csv(buffer_, con): @@ -519,6 +557,30 @@ def read_agents_csv(buffer_, con): ensure_agents_region_sector_coverage(con) +def ensure_agents_region_sector_coverage( + con: duckdb.DuckDBPyConnection, table: str = "agents" +) -> None: + """Validate there is at least one agent for every (region, sector).""" + query = f""" + WITH full_grid AS ( + SELECT r.id AS region, s.id AS sector + FROM regions r + CROSS JOIN sectors s + ), + present AS ( + SELECT DISTINCT region, sector FROM {table} + ) + SELECT COUNT(*) AS missing_count + FROM full_grid fg + LEFT JOIN present p + ON p.region = fg.region AND p.sector = fg.sector + WHERE p.region IS NULL + """ + missing_count = con.execute(query).fetchone()[0] + if missing_count: + raise ValueError("agents must include at least one agent per (region, sector)") + + def read_agent_objectives_csv(buffer_, con): sql = """CREATE TABLE agent_objectives ( agent VARCHAR REFERENCES agents(id), From a6461108592c569c45b8e5da391a45d9b46d9443 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 19:36:54 +0100 Subject: [PATCH 33/43] Make the helper functions a bit clearer --- src/muse/new_input/readers.py | 161 +++++++++++++++------------------- 1 file changed, 72 insertions(+), 89 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index e4be42892..2a9cb73c5 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -73,45 +73,41 @@ def validate_present_full_dim_coverage( ) -> None: """Ensure that for each present entity (present_cols), all dims combos exist. - The target table must use these exact column names. + Generates the cartesian product of present entities crossed with the dim + sources and compares to the table using EXCEPT. """ for d in dims: if d not in DIM_TO_SOURCE: raise ValueError(f"Unsupported dim: {d}") - present_cols_csv = ", ".join(present_cols) - select_present = f"SELECT DISTINCT {present_cols_csv} FROM {table}" + present_csv = ", ".join(present_cols) + proj = ", ".join([*present_cols, *dims]) - fg_parts = [f"p.{c} AS {c}" for c in present_cols] - cross_joins = [] - for d in dims: - src_table, src_col = DIM_TO_SOURCE[d] - alias = f"{d[0]}src" - fg_parts.append(f"{alias}.{src_col} AS {d}") - cross_joins.append(f"CROSS JOIN {src_table} {alias}") - fg_select = ", ".join(fg_parts) - cross_join_sql = "\n ".join(cross_joins) + # Columns from present set (aliased p.) + present_select = [f"p.{c} AS {c}" for c in present_cols] - join_keys = " AND ".join([f"t.{c} = fg.{c}" for c in present_cols + dims]) - null_check_col = present_cols[0] + # Columns from dimension sources (dim_table.dim_id AS dim_name) + dim_cols = [f"{DIM_TO_SOURCE[d][0]}.{DIM_TO_SOURCE[d][1]} AS {d}" for d in dims] + cols_sql = ", ".join([*present_select, *dim_cols]) - query = f""" - WITH present AS ( - {select_present} + # FROM present set then CROSS JOIN each dim source table to get the grid + joins = [f"(SELECT DISTINCT {present_csv} FROM {table}) p"] + joins += [DIM_TO_SOURCE[d][0] for d in dims] + joins_sql = " CROSS JOIN ".join(joins) + + sql = f""" + WITH a AS ( + SELECT {cols_sql} + FROM {joins_sql} ), - full_grid AS ( - SELECT {fg_select} - FROM present p - {cross_join_sql} + missing AS ( + SELECT {proj} FROM a + EXCEPT + SELECT {proj} FROM {table} ) - SELECT COUNT(*) AS missing_count - FROM full_grid fg - LEFT JOIN {table} t - ON {join_keys} - WHERE t.{null_check_col} IS NULL + SELECT COUNT(*) FROM missing """ - missing_count = con.execute(query).fetchone()[0] - if missing_count: + if con.execute(sql).fetchone()[0]: raise ValueError(error_message) @@ -123,36 +119,31 @@ def validate_full_coverage( if d not in DIM_TO_SOURCE: raise ValueError(f"Unsupported dim: {d}") - # Build full grid FROM and CROSS JOINs over all dims - select_parts = [] - from_and_joins = [] - first = True + # Build full grid FROM and CROSS JOINs over all dims in one compact SQL + select_cols = [] + tables = [] for d in dims: src_table, src_col = DIM_TO_SOURCE[d] - alias = f"{d[0]}src" - select_parts.append(f"{alias}.{src_col} AS {d}") - if first: - from_and_joins.append(f"FROM {src_table} {alias}") - first = False - else: - from_and_joins.append(f"CROSS JOIN {src_table} {alias}") - - full_select_cols = ", ".join(select_parts) - from_clause_sql = "\n ".join(from_and_joins) - join_keys = " AND ".join([f"t.{d} = fg.{d}" for d in dims]) - first_dim = dims[0] + select_cols.append(f"{src_table}.{src_col} AS {d}") + tables.append(src_table) - query = f""" - WITH full_grid AS ( - SELECT {full_select_cols} - {from_clause_sql} + proj = ", ".join(dims) + cols_sql = ", ".join(select_cols) + joins_sql = " CROSS JOIN ".join(tables) + + sql = f""" + WITH a AS ( + SELECT {cols_sql} + FROM {joins_sql} + ), + missing AS ( + SELECT {proj} FROM a + EXCEPT + SELECT {proj} FROM {table} ) - SELECT COUNT(*) AS missing_count - FROM full_grid fg - LEFT JOIN {table} t ON {join_keys} - WHERE t.{first_dim} IS NULL + SELECT COUNT(*) FROM missing """ - missing_count = con.execute(query).fetchone()[0] + missing_count = con.execute(sql).fetchone()[0] if missing_count: raise ValueError("Missing required combinations across dims") @@ -166,50 +157,42 @@ def fill_missing_dim_combinations( ) -> None: """Insert fill_value for any missing combinations across the given dims. + Anchors on the first dim's present values to avoid generating rows for + completely absent entities, then uses an EXCEPT comparison to find and + insert missing keys. The target table must use these exact column names for the dims. """ for d in dims: if d not in DIM_TO_SOURCE: raise ValueError(f"Unsupported dim: {d}") - # Build full grid anchored on present values of the first dim (e.g., commodity) present_key = dims[0] - present_cte = f"SELECT DISTINCT {present_key} FROM {table}" - - select_parts = [f"p.{present_key} AS {present_key}"] - from_and_joins = [f"FROM ({present_cte}) p"] - for d in dims[1:]: - src_table, src_col = DIM_TO_SOURCE[d] - alias = f"{d[0]}src" - select_parts.append(f"{alias}.{src_col} AS {d}") - from_and_joins.append(f"CROSS JOIN {src_table} {alias}") - full_select_cols = ", ".join(select_parts) - from_clause_sql = "\n ".join(from_and_joins) - - # Build join keys to detect missing rows - join_keys = " AND ".join([f"t.{d} = fg.{d}" for d in dims]) - insert_cols_csv = ", ".join([*dims, value_column]) - select_cols_missing = ", ".join([f"fg.{d}" for d in dims]) - select_cols_plain = ", ".join(dims) - - con.execute( - f""" - WITH full_grid AS ( - SELECT {full_select_cols} - {from_clause_sql} - ), - missing AS ( - SELECT {select_cols_missing} - FROM full_grid fg - LEFT JOIN {table} t ON {join_keys} - WHERE t.{present_key} IS NULL - ) - INSERT INTO {table} ({insert_cols_csv}) - SELECT {select_cols_plain}, ? AS {value_column} - FROM missing - """, - [fill_value], + proj = ", ".join(dims) + # Build column list: present key from p, other dims from their sources + present_cols_sql = f"p.{present_key} AS {present_key}" + dim_cols_sql = ", ".join( + [f"{DIM_TO_SOURCE[d][0]}.{DIM_TO_SOURCE[d][1]} AS {d}" for d in dims[1:]] ) + cols_sql = ", ".join([c for c in [present_cols_sql, dim_cols_sql] if c]) + # Build CROSS JOIN chain: present set, then each dim source table + joins = [f"(SELECT DISTINCT {present_key} FROM {table}) p"] + joins += [DIM_TO_SOURCE[d][0] for d in dims[1:]] + joins_sql = " CROSS JOIN ".join(joins) + + sql = f""" + WITH a AS ( + SELECT {cols_sql} + FROM {joins_sql} + ), + missing AS ( + SELECT {proj} FROM a + EXCEPT + SELECT {proj} FROM {table} + ) + INSERT INTO {table} ({proj}, {value_column}) + SELECT {proj}, ? FROM missing + """ + con.execute(sql, [fill_value]) def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: From dead01120bbfecd3f17c6b25ec50c784b9e1345c Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 19:54:39 +0100 Subject: [PATCH 34/43] Add more constraints --- src/muse/new_input/readers.py | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 2a9cb73c5..3e80b05de 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -291,7 +291,7 @@ def read_commodity_costs_csv(buffer_, con): sql = """CREATE TABLE commodity_costs ( commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), - year BIGINT, + year BIGINT REFERENCES years(year), value DOUBLE, PRIMARY KEY (commodity, region, year) ); @@ -334,8 +334,8 @@ def read_demand_csv(buffer_, con): sql = """CREATE TABLE demand ( commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), - year BIGINT, - demand DOUBLE, + year BIGINT REFERENCES years(year), + demand DOUBLE CHECK (demand >= 0), PRIMARY KEY (commodity, region, year) ); """ @@ -350,7 +350,7 @@ def read_demand_csv(buffer_, con): present_cols=["commodity"], dims=["region", "year"], error_message=( - "commodity_costs must include all regions/years for any mentioned commodity" + "demand must include all regions/years for any mentioned commodity" ), ) @@ -400,15 +400,15 @@ def read_process_parameters_csv(buffer_, con): sql = """CREATE TABLE process_parameters ( process VARCHAR REFERENCES processes(id), region VARCHAR REFERENCES regions(id), - year BIGINT, - cap_par DOUBLE, - fix_par DOUBLE, - var_par DOUBLE, - max_capacity_addition DOUBLE, - max_capacity_growth DOUBLE, - total_capacity_limit DOUBLE, - lifetime DOUBLE, - discount_rate DOUBLE, + year BIGINT REFERENCES years(year), + cap_par DOUBLE CHECK (cap_par >= 0), + fix_par DOUBLE CHECK (fix_par >= 0), + var_par DOUBLE CHECK (var_par >= 0), + max_capacity_addition DOUBLE CHECK (max_capacity_addition >= 0), + max_capacity_growth DOUBLE CHECK (max_capacity_growth >= 0), + total_capacity_limit DOUBLE CHECK (total_capacity_limit >= 0), + lifetime DOUBLE CHECK (lifetime > 0), + discount_rate DOUBLE CHECK (discount_rate >= 0), PRIMARY KEY (process, region, year) ); """ @@ -446,7 +446,7 @@ def read_process_flows_csv(buffer_, con): process VARCHAR REFERENCES processes(id), commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), - year BIGINT, + year BIGINT REFERENCES years(year), coeff DOUBLE, PRIMARY KEY (process, commodity, region, year) ); @@ -484,7 +484,7 @@ def read_process_availabilities_csv(buffer_, con): sql = """CREATE TABLE process_availabilities ( process VARCHAR REFERENCES processes(id), region VARCHAR REFERENCES regions(id), - year BIGINT, + year BIGINT REFERENCES years(year), time_slice VARCHAR REFERENCES time_slices(id), limit_type VARCHAR CHECK (limit_type IN ('up','down')), value DOUBLE, @@ -518,7 +518,7 @@ def read_agents_csv(buffer_, con): sector VARCHAR REFERENCES sectors(id), search_rule VARCHAR, decision_rule VARCHAR, - quantity DOUBLE + quantity DOUBLE CHECK (quantity >= 0 AND quantity <= 1) ); """ con.sql(sql) @@ -568,7 +568,7 @@ def read_agent_objectives_csv(buffer_, con): sql = """CREATE TABLE agent_objectives ( agent VARCHAR REFERENCES agents(id), objective_type VARCHAR, - decision_weight DOUBLE, + decision_weight DOUBLE CHECK (decision_weight >= 0 AND decision_weight <= 1), objective_sort BOOLEAN, PRIMARY KEY (agent, objective_type) ); @@ -604,7 +604,7 @@ def read_assets_csv(buffer_, con): process VARCHAR REFERENCES processes(id), region VARCHAR REFERENCES regions(id), commission_year BIGINT, - capacity DOUBLE, + capacity DOUBLE CHECK (capacity > 0), PRIMARY KEY (agent, process, region, commission_year) ); """ From c4b80a7279f750f4eefd49d6f8844641e3f65b39 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 21:43:32 +0100 Subject: [PATCH 35/43] Small tidies --- src/muse/new_input/readers.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 3e80b05de..1fa46769b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -64,7 +64,7 @@ def expand_time_slices(source_relation: str = "rel") -> str: """ -def validate_present_full_dim_coverage( +def validate_full_coverage_for_present( con: duckdb.DuckDBPyConnection, table: str, present_cols: list[str], @@ -310,7 +310,7 @@ def read_commodity_costs_csv(buffer_, con): ) # Validate coverage - validate_present_full_dim_coverage( + validate_full_coverage_for_present( con, table="commodity_costs", present_cols=["commodity"], @@ -344,7 +344,7 @@ def read_demand_csv(buffer_, con): con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") # Validate coverage - validate_present_full_dim_coverage( + validate_full_coverage_for_present( con, table="demand", present_cols=["commodity"], @@ -469,7 +469,7 @@ def read_process_flows_csv(buffer_, con): ) # Validate coverage - validate_present_full_dim_coverage( + validate_full_coverage_for_present( con, table="process_flows", present_cols=["process", "commodity"], @@ -702,7 +702,10 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. cc.year AS year, cc.commodity AS commodity, cc.value AS prices, - (? || '/' || c.unit) AS units_prices + (? || '/' || c.unit) AS units_prices, + CAST(0.0 AS DOUBLE) AS exports, + CAST(0.0 AS DOUBLE) AS imports, + CAST(0.0 AS DOUBLE) AS static_trade FROM commodity_costs cc JOIN commodities c ON c.id = cc.commodity """, @@ -710,20 +713,13 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. ).fetchdf() # Build dataset from prices - prices_df = create_multiindex( + df = create_multiindex( df, index_columns=["region", "year", "commodity"], index_names=["region", "year", "commodity"], drop_columns=True, ) - result = create_xarray_dataset(prices_df) - - # Add zero trade variables (legacy) - result["exports"] = xr.zeros_like(result["prices"]).rename("exports") - result["imports"] = xr.zeros_like(result["prices"]).rename("imports") - result["static_trade"] = (result["imports"] - result["exports"]).rename( - "static_trade" - ) + result = create_xarray_dataset(df) return result From d5e279fcd8a680f06137a54e4d92dfa5a36e635a Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 21:49:19 +0100 Subject: [PATCH 36/43] Separate fields for input/output flows --- src/muse/new_input/readers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 1fa46769b..09705e66b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -447,7 +447,8 @@ def read_process_flows_csv(buffer_, con): commodity VARCHAR REFERENCES commodities(id), region VARCHAR REFERENCES regions(id), year BIGINT REFERENCES years(year), - coeff DOUBLE, + input_coeff DOUBLE CHECK (input_coeff >= 0), + output_coeff DOUBLE CHECK (output_coeff >= 0), PRIMARY KEY (process, commodity, region, year) ); """ @@ -463,7 +464,8 @@ def read_process_flows_csv(buffer_, con): commodity_id, region_id, year, - coeff + CASE WHEN coeff < 0 THEN -coeff ELSE 0 END AS input_coeff, + CASE WHEN coeff > 0 THEN coeff ELSE 0 END AS output_coeff FROM ({expansion_sql}) AS unioned; """ ) From 95c69c16c9833a5e5d6cad7f47046971429afafe Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 22:57:59 +0100 Subject: [PATCH 37/43] Tidy ups --- .../process_availabilities.csv | 6 +- src/muse/new_input/readers.py | 405 ++++++++---------- 2 files changed, 193 insertions(+), 218 deletions(-) diff --git a/src/muse/data/example/default_new_input/process_availabilities.csv b/src/muse/data/example/default_new_input/process_availabilities.csv index 39824201b..54b6e5906 100644 --- a/src/muse/data/example/default_new_input/process_availabilities.csv +++ b/src/muse/data/example/default_new_input/process_availabilities.csv @@ -1,4 +1,4 @@ process_id,region_id,year,time_slice,limit_type,value -gassupply1,R1,all,annual,up,0.9 -gasCCGT,R1,all,annual,up,0.9 -windturbine,R1,all,annual,up,0.4 +gassupply1,R1,all,all,up,0.9 +gasCCGT,R1,all,all,up,0.9 +windturbine,R1,all,all,up,0.4 diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 09705e66b..4181c9dac 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,3 +1,5 @@ +import uuid + import duckdb import pandas as pd import xarray as xr @@ -14,127 +16,130 @@ } -def expand_years(source_relation: str = "rel") -> str: - """Return a composable SQL that expands 'year' over 'all' or semicolon lists.""" +def _expand_list_or_all( + col: str, + *, + domain_table: str, + domain_col: str, + source_relation: str = "rel", +) -> str: + """Return composable SQL that expands a column over 'all' or ';'-lists. + + - For scalar values (not 'all' and no ';'), rows are passed through. + - For lists, rows are duplicated for each trimmed item. + - For 'all', rows are joined to the full domain table; value comes from + `domain_table.domain_col`. + """ + col_text = f"CAST(s.{col} AS VARCHAR)" + return f""" - SELECT s.* REPLACE (CAST(s.year AS BIGINT) AS year) + SELECT s.* REPLACE (s.{col} AS {col}) FROM {source_relation} s - WHERE lower(CAST(s.year AS VARCHAR)) <> 'all' AND POSITION(';' IN CAST(s.year AS VARCHAR)) = 0 + WHERE lower({col_text}) <> 'all' + AND POSITION(';' IN {col_text}) = 0 UNION ALL - SELECT s.* REPLACE (CAST(TRIM(item) AS BIGINT) AS year) + SELECT s.* REPLACE (TRIM(item) AS {col}) FROM {source_relation} s - CROSS JOIN UNNEST(str_split(CAST(s.year AS VARCHAR), ';')) AS t(item) - WHERE POSITION(';' IN CAST(s.year AS VARCHAR)) > 0 + CROSS JOIN UNNEST(str_split({col_text}, ';')) AS t(item) + WHERE POSITION(';' IN {col_text}) > 0 UNION ALL - SELECT s.* REPLACE (y.year AS year) + SELECT s.* REPLACE (d.{domain_col} AS {col}) FROM {source_relation} s - CROSS JOIN years y - WHERE lower(CAST(s.year AS VARCHAR)) = 'all' - """ # noqa: E501 + JOIN {domain_table} d ON lower({col_text}) = 'all' + """ + + +def expand_years(source_relation: str = "rel") -> str: + """Expand `year` over 'all' and ';'-lists.""" + return _expand_list_or_all( + "year", + domain_table="years", + domain_col="year", + source_relation=source_relation, + ) def expand_regions(source_relation: str = "rel") -> str: - """Return a composable SQL that expands 'region_id' over 'all' or lists.""" - return f""" - SELECT s.* - FROM {source_relation} s - WHERE lower(CAST(s.region_id AS VARCHAR)) <> 'all' AND POSITION(';' IN CAST(s.region_id AS VARCHAR)) = 0 - UNION ALL - SELECT s.* REPLACE (TRIM(item) AS region_id) - FROM {source_relation} s - CROSS JOIN UNNEST(str_split(CAST(s.region_id AS VARCHAR), ';')) AS t(item) - WHERE POSITION(';' IN CAST(s.region_id AS VARCHAR)) > 0 - UNION ALL - SELECT s.* REPLACE (r.id AS region_id) - FROM {source_relation} s - JOIN regions r ON lower(CAST(s.region_id AS VARCHAR)) = 'all' - """ # noqa: E501 + """Expand `region_id` over 'all' and ';'-lists.""" + return _expand_list_or_all( + "region_id", + domain_table="regions", + domain_col="id", + source_relation=source_relation, + ) def expand_time_slices(source_relation: str = "rel") -> str: - """Return a composable SQL that expands 'time_slice' over 'annual'.""" - return f""" - SELECT s.* - FROM {source_relation} s - WHERE lower(CAST(s.time_slice AS VARCHAR)) <> 'annual' - UNION ALL - SELECT s.* REPLACE (t.id AS time_slice) - FROM {source_relation} s - JOIN time_slices t ON lower(CAST(s.time_slice AS VARCHAR)) = 'annual' - """ + """Expand `time_slice` over 'all' and ';'-lists.""" + return _expand_list_or_all( + "time_slice", + domain_table="time_slices", + domain_col="id", + source_relation=source_relation, + ) + + +def chain_expanders(source: str, *expanders) -> str: + """Compose multiple expander functions over a source relation name/SQL.""" + sql = source + for i, expander in enumerate(expanders): + src = sql if i == 0 else f"({sql})" + sql = expander(source_relation=src) + return sql + +def insert_from_csv( + con: duckdb.DuckDBPyConnection, + buffer_, + insert_into: str, + select_sql: str, + expanders: tuple = (), +) -> None: + """Standardize: CSV -> unique temp view -> optional expanders -> INSERT.""" + view_name = f"rel_{uuid.uuid4().hex}" + rel = con.read_csv(buffer_, header=True, delimiter=",") + rel.create(view_name) + src_sql = chain_expanders(view_name, *expanders) if expanders else view_name + wrapped_src = src_sql if not expanders else f"({src_sql}) AS unioned" + con.sql(f"INSERT INTO {insert_into} {select_sql.format(src=wrapped_src)}") -def validate_full_coverage_for_present( + +def validate_coverage( con: duckdb.DuckDBPyConnection, table: str, - present_cols: list[str], dims: list[str], - error_message: str, + present: list[str] | None = None, ) -> None: - """Ensure that for each present entity (present_cols), all dims combos exist. + """Validate that required combinations exist in `table`. - Generates the cartesian product of present entities crossed with the dim - sources and compares to the table using EXCEPT. + - If `present` is None: requires full cartesian product across `dims`. + - If `present` is provided: for each distinct `present` key in `table`, + requires all combinations across `dims`. """ for d in dims: if d not in DIM_TO_SOURCE: raise ValueError(f"Unsupported dim: {d}") - present_csv = ", ".join(present_cols) - proj = ", ".join([*present_cols, *dims]) - - # Columns from present set (aliased p.) - present_select = [f"p.{c} AS {c}" for c in present_cols] - - # Columns from dimension sources (dim_table.dim_id AS dim_name) - dim_cols = [f"{DIM_TO_SOURCE[d][0]}.{DIM_TO_SOURCE[d][1]} AS {d}" for d in dims] - cols_sql = ", ".join([*present_select, *dim_cols]) - - # FROM present set then CROSS JOIN each dim source table to get the grid - joins = [f"(SELECT DISTINCT {present_csv} FROM {table}) p"] - joins += [DIM_TO_SOURCE[d][0] for d in dims] - joins_sql = " CROSS JOIN ".join(joins) - - sql = f""" - WITH a AS ( - SELECT {cols_sql} - FROM {joins_sql} - ), - missing AS ( - SELECT {proj} FROM a - EXCEPT - SELECT {proj} FROM {table} - ) - SELECT COUNT(*) FROM missing - """ - if con.execute(sql).fetchone()[0]: - raise ValueError(error_message) - + select_cols: list[str] = [] + joins: list[str] = [] -def validate_full_coverage( - con: duckdb.DuckDBPyConnection, table: str, dims: list[str] -) -> None: - """Validate that all combinations across dims exist in table.""" - for d in dims: - if d not in DIM_TO_SOURCE: - raise ValueError(f"Unsupported dim: {d}") + if present: + present_csv = ", ".join(present) + joins.append(f"(SELECT DISTINCT {present_csv} FROM {table}) p") + select_cols.extend([f"p.{c} AS {c}" for c in present]) - # Build full grid FROM and CROSS JOINs over all dims in one compact SQL - select_cols = [] - tables = [] for d in dims: src_table, src_col = DIM_TO_SOURCE[d] select_cols.append(f"{src_table}.{src_col} AS {d}") - tables.append(src_table) + joins.append(src_table) - proj = ", ".join(dims) - cols_sql = ", ".join(select_cols) - joins_sql = " CROSS JOIN ".join(tables) + proj_cols = [*(present or []), *dims] + proj = ", ".join(proj_cols) sql = f""" WITH a AS ( - SELECT {cols_sql} - FROM {joins_sql} + SELECT {", ".join(select_cols)} + FROM {" CROSS JOIN ".join(joins)} ), missing AS ( SELECT {proj} FROM a @@ -143,8 +148,7 @@ def validate_full_coverage( ) SELECT COUNT(*) FROM missing """ - missing_count = con.execute(sql).fetchone()[0] - if missing_count: + if con.execute(sql).fetchone()[0]: raise ValueError("Missing required combinations across dims") @@ -297,27 +301,20 @@ def read_commodity_costs_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - years_sql = expand_years(source_relation="rel") - regions_sql = expand_regions(source_relation=f"({years_sql})") - expansion_sql = regions_sql - con.sql( - f""" - INSERT INTO commodity_costs - SELECT commodity_id, region_id, year, value - FROM ({expansion_sql}) AS unioned; - """ + insert_from_csv( + con, + buffer_, + "commodity_costs(commodity, region, year, value)", + "SELECT commodity_id, region_id, year, value FROM {src}", + expanders=(expand_years, expand_regions), ) # Validate coverage - validate_full_coverage_for_present( + validate_coverage( con, table="commodity_costs", - present_cols=["commodity"], dims=["region", "year"], - error_message=( - "commodity_costs must include all regions/years for any mentioned commodity" - ), + present=["commodity"], ) # Insert data for missing commodities @@ -340,18 +337,19 @@ def read_demand_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") + insert_from_csv( + con, + buffer_, + "demand(commodity, region, year, demand)", + "SELECT commodity_id, region_id, year, demand FROM {src}", + ) # Validate coverage - validate_full_coverage_for_present( + validate_coverage( con, table="demand", - present_cols=["commodity"], dims=["region", "year"], - error_message=( - "demand must include all regions/years for any mentioned commodity" - ), + present=["commodity"], ) # Insert data for missing commodities @@ -374,14 +372,12 @@ def read_demand_slicing_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - regions_sql = expand_regions(source_relation="rel") - ts_sql = expand_time_slices(source_relation=f"({regions_sql})") - expansion_sql = ts_sql - con.sql( - f"""INSERT INTO demand_slicing SELECT - commodity_id, region_id, time_slice, fraction FROM ({expansion_sql}) AS unioned; - """ # noqa: E501 + insert_from_csv( + con, + buffer_, + "demand_slicing(commodity, region, time_slice, fraction)", + "SELECT commodity_id, region_id, time_slice, fraction FROM {src}", + expanders=(expand_regions, expand_time_slices), ) @@ -413,13 +409,18 @@ def read_process_parameters_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - years_sql = expand_years(source_relation="rel") - regions_sql = expand_regions(source_relation=f"({years_sql})") - expansion_sql = regions_sql - con.sql( - f""" - INSERT INTO process_parameters SELECT + insert_from_csv( + con, + buffer_, + ( + "process_parameters(" + "process, region, year, cap_par, fix_par, var_par, " + "max_capacity_addition, max_capacity_growth, total_capacity_limit, " + "lifetime, discount_rate)" + ), + ( + """ + SELECT process_id, region_id, year, @@ -431,12 +432,14 @@ def read_process_parameters_csv(buffer_, con): total_capacity_limit, lifetime, discount_rate - FROM ({expansion_sql}) AS unioned; - """ + FROM {src} + """ + ), + expanders=(expand_years, expand_regions), ) # Validate coverage - validate_full_coverage( + validate_coverage( con, table="process_parameters", dims=["process", "region", "year"] ) @@ -453,32 +456,29 @@ def read_process_flows_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - years_sql = expand_years(source_relation="rel") - regions_sql = expand_regions(source_relation=f"({years_sql})") - expansion_sql = regions_sql - con.sql( - f""" - INSERT INTO process_flows SELECT + insert_from_csv( + con, + buffer_, + "process_flows(process, commodity, region, year, input_coeff, output_coeff)", + """ + SELECT process_id, commodity_id, region_id, year, - CASE WHEN coeff < 0 THEN -coeff ELSE 0 END AS input_coeff, - CASE WHEN coeff > 0 THEN coeff ELSE 0 END AS output_coeff - FROM ({expansion_sql}) AS unioned; - """ + CASE WHEN coeff < 0 THEN -coeff ELSE 0 END, + CASE WHEN coeff > 0 THEN coeff ELSE 0 END + FROM {src} + """, + expanders=(expand_years, expand_regions), ) # Validate coverage - validate_full_coverage_for_present( + validate_coverage( con, table="process_flows", - present_cols=["process", "commodity"], dims=["region", "year"], - error_message=( - "process_flows must include all regions/years for any present (process, commodity)" # noqa: E501 - ), + present=["process", "commodity"], ) @@ -494,22 +494,21 @@ def read_process_availabilities_csv(buffer_, con): ); """ con.sql(sql) - rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - years_sql = expand_years(source_relation="rel") - regions_sql = expand_regions(source_relation=f"({years_sql})") - ts_sql = expand_time_slices(source_relation=f"({regions_sql})") - expansion_sql = ts_sql - con.sql( - f""" - INSERT INTO process_availabilities SELECT + insert_from_csv( + con, + buffer_, + "process_availabilities(process, region, year, time_slice, limit_type, value)", + """ + SELECT process_id, region_id, year, time_slice, limit_type, value - FROM ({expansion_sql}) AS unioned; - """ + FROM {src} + """, + expanders=(expand_years, expand_regions, expand_time_slices), ) @@ -726,72 +725,48 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> list[dict]: - """Create a list of agent dictionaries for a sector from DB tables. - - The result matches the structure returned by the legacy CSV-based - process_agent_parameters, but only includes the required fields: - - name, region, objectives, search_rules, decision, quantity - - The following legacy fields are intentionally omitted: agent_type, - share, maturity_threshold, spend_limit. - """ - # Gather agent base data for the sector - agents_df = con.execute( - """ - SELECT id AS name, - region AS region, - search_rule, - decision_rule, - quantity - FROM agents - WHERE sector = ? - """, - [sector], - ).fetchdf() - - # Gather objectives per agent - objectives_df = con.execute( + """Create a list of agent dictionaries for a sector from DB tables.""" + df = con.execute( """ - SELECT agent AS name, - objective_type, - objective_sort, - decision_weight - FROM agent_objectives - WHERE agent IN (SELECT id FROM agents WHERE sector = ?) - ORDER BY name + SELECT + a.id AS name, + a.region AS region, + a.search_rule, + a.decision_rule, + a.quantity, + LIST(o.objective_type) + FILTER (WHERE o.objective_type IS NOT NULL) AS objectives, + LIST(struct_pack( + objective_type := o.objective_type, + objective_sort := o.objective_sort, + decision_weight := o.decision_weight + )) + FILTER (WHERE o.objective_type IS NOT NULL) AS decision_params + FROM agents a + LEFT JOIN agent_objectives o ON o.agent = a.id + WHERE a.sector = ? + GROUP BY 1,2,3,4,5 + ORDER BY 1 """, [sector], ).fetchdf() - # Assemble result result: list[dict] = [] - for _, row in agents_df.iterrows(): - agent_name = row["name"] - agent_objectives = objectives_df[objectives_df["name"] == agent_name] - - # Objectives list: in legacy, these are strings like 'LCOE' - objectives = agent_objectives["objective_type"].tolist() - - # Decision parameters: tuples of - # (objective_type, objective_sort, decision_weight) - decision_params = list( - zip( - agent_objectives["objective_type"].tolist(), - agent_objectives["objective_sort"].tolist(), - agent_objectives["decision_weight"].tolist(), - ) + for _, r in df.iterrows(): + params = [ + (d["objective_type"], d["objective_sort"], d["decision_weight"]) # type: ignore[index] + for d in (r["decision_params"] or []) + ] + result.append( + { + "name": r["name"], + "region": r["region"], + "objectives": (r["objectives"] or []), + "search_rules": r["search_rule"], + "decision": {"name": r["decision_rule"], "parameters": params}, + "quantity": r["quantity"], + } ) - - agent_dict = { - "name": agent_name, - "region": row["region"], - "objectives": objectives, - "search_rules": row["search_rule"], - "decision": {"name": row["decision_rule"], "parameters": decision_params}, - "quantity": row["quantity"], - } - result.append(agent_dict) - return result From 9923ab19ca5e1a44787f1f5c55eaf71085bc1923 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Wed, 13 Aug 2025 23:21:53 +0100 Subject: [PATCH 38/43] Tidier still --- src/muse/new_input/readers.py | 128 +++++++++++++--------------------- 1 file changed, 49 insertions(+), 79 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 4181c9dac..a7504384d 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -1,5 +1,3 @@ -import uuid - import duckdb import pandas as pd import xarray as xr @@ -80,28 +78,12 @@ def expand_time_slices(source_relation: str = "rel") -> str: def chain_expanders(source: str, *expanders) -> str: - """Compose multiple expander functions over a source relation name/SQL.""" + """Compose expander SQLs and return a FROM-ready subquery alias.""" sql = source for i, expander in enumerate(expanders): src = sql if i == 0 else f"({sql})" sql = expander(source_relation=src) - return sql - - -def insert_from_csv( - con: duckdb.DuckDBPyConnection, - buffer_, - insert_into: str, - select_sql: str, - expanders: tuple = (), -) -> None: - """Standardize: CSV -> unique temp view -> optional expanders -> INSERT.""" - view_name = f"rel_{uuid.uuid4().hex}" - rel = con.read_csv(buffer_, header=True, delimiter=",") - rel.create(view_name) - src_sql = chain_expanders(view_name, *expanders) if expanders else view_name - wrapped_src = src_sql if not expanders else f"({src_sql}) AS unioned" - con.sql(f"INSERT INTO {insert_into} {select_sql.format(src=wrapped_src)}") + return f"({sql})" def validate_coverage( @@ -243,11 +225,9 @@ def read_time_slices_csv(buffer_, con): """ con.sql(sql) - # Read CSV into a temporary relation rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 - - # Insert into the table with computed id - con.sql(""" + con.sql( + """ INSERT INTO time_slices SELECT season || '.' || day || '.' || time_of_day AS id, @@ -256,7 +236,8 @@ def read_time_slices_csv(buffer_, con): time_of_day, fraction FROM rel - """) + """ + ) def read_commodities_csv(buffer_, con): @@ -301,12 +282,14 @@ def read_commodity_costs_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - "commodity_costs(commodity, region, year, value)", - "SELECT commodity_id, region_id, year, value FROM {src}", - expanders=(expand_years, expand_regions), + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = chain_expanders("rel", expand_years, expand_regions) + con.sql( + f""" + INSERT INTO commodity_costs + SELECT commodity_id, region_id, year, value + FROM {expansion_sql}; + """ ) # Validate coverage @@ -337,12 +320,8 @@ def read_demand_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - "demand(commodity, region, year, demand)", - "SELECT commodity_id, region_id, year, demand FROM {src}", - ) + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") # Validate coverage validate_coverage( @@ -372,12 +351,14 @@ def read_demand_slicing_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - "demand_slicing(commodity, region, time_slice, fraction)", - "SELECT commodity_id, region_id, time_slice, fraction FROM {src}", - expanders=(expand_regions, expand_time_slices), + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = chain_expanders("rel", expand_regions, expand_time_slices) + con.sql( + f""" + INSERT INTO demand_slicing SELECT + commodity_id, region_id, time_slice, fraction + FROM {expansion_sql}; + """ ) @@ -409,18 +390,11 @@ def read_process_parameters_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - ( - "process_parameters(" - "process, region, year, cap_par, fix_par, var_par, " - "max_capacity_addition, max_capacity_growth, total_capacity_limit, " - "lifetime, discount_rate)" - ), - ( - """ - SELECT + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = chain_expanders("rel", expand_years, expand_regions) + con.sql( + f""" + INSERT INTO process_parameters SELECT process_id, region_id, year, @@ -432,10 +406,8 @@ def read_process_parameters_csv(buffer_, con): total_capacity_limit, lifetime, discount_rate - FROM {src} - """ - ), - expanders=(expand_years, expand_regions), + FROM {expansion_sql}; + """ ) # Validate coverage @@ -456,21 +428,19 @@ def read_process_flows_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - "process_flows(process, commodity, region, year, input_coeff, output_coeff)", - """ - SELECT + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = chain_expanders("rel", expand_years, expand_regions) + con.sql( + f""" + INSERT INTO process_flows SELECT process_id, commodity_id, region_id, year, - CASE WHEN coeff < 0 THEN -coeff ELSE 0 END, - CASE WHEN coeff > 0 THEN coeff ELSE 0 END - FROM {src} - """, - expanders=(expand_years, expand_regions), + CASE WHEN coeff < 0 THEN -coeff ELSE 0 END AS input_coeff, + CASE WHEN coeff > 0 THEN coeff ELSE 0 END AS output_coeff + FROM {expansion_sql}; + """ ) # Validate coverage @@ -494,21 +464,21 @@ def read_process_availabilities_csv(buffer_, con): ); """ con.sql(sql) - insert_from_csv( - con, - buffer_, - "process_availabilities(process, region, year, time_slice, limit_type, value)", - """ - SELECT + rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 + expansion_sql = chain_expanders( + "rel", expand_years, expand_regions, expand_time_slices + ) + con.sql( + f""" + INSERT INTO process_availabilities SELECT process_id, region_id, year, time_slice, limit_type, value - FROM {src} - """, - expanders=(expand_years, expand_regions, expand_time_slices), + FROM {expansion_sql}; + """ ) From 79563f2c42b91c5eda4bf691e365fa73aaadb9be Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 15 Aug 2025 14:37:17 +0100 Subject: [PATCH 39/43] Add more validation --- src/muse/new_input/readers.py | 114 +++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index a7504384d..45ed69db8 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -138,36 +138,34 @@ def fill_missing_dim_combinations( con: duckdb.DuckDBPyConnection, table: str, dims: list[str], - value_column: str, - fill_value: float, + value_columns: dict[str, float], ) -> None: - """Insert fill_value for any missing combinations across the given dims. + """Insert fill values for any missing combinations across the given dims. - Anchors on the first dim's present values to avoid generating rows for - completely absent entities, then uses an EXCEPT comparison to find and - insert missing keys. + Generates the full cartesian product across all dimensions from their source tables, + then uses an EXCEPT comparison to find and insert missing keys. The target table must use these exact column names for the dims. """ for d in dims: if d not in DIM_TO_SOURCE: raise ValueError(f"Unsupported dim: {d}") - present_key = dims[0] proj = ", ".join(dims) - # Build column list: present key from p, other dims from their sources - present_cols_sql = f"p.{present_key} AS {present_key}" + + # Build column list: all dims from their source tables dim_cols_sql = ", ".join( - [f"{DIM_TO_SOURCE[d][0]}.{DIM_TO_SOURCE[d][1]} AS {d}" for d in dims[1:]] + [f"{DIM_TO_SOURCE[d][0]}.{DIM_TO_SOURCE[d][1]} AS {d}" for d in dims] ) - cols_sql = ", ".join([c for c in [present_cols_sql, dim_cols_sql] if c]) - # Build CROSS JOIN chain: present set, then each dim source table - joins = [f"(SELECT DISTINCT {present_key} FROM {table}) p"] - joins += [DIM_TO_SOURCE[d][0] for d in dims[1:]] + # Build CROSS JOIN chain: all dim source tables + joins = [DIM_TO_SOURCE[d][0] for d in dims] joins_sql = " CROSS JOIN ".join(joins) + value_cols = ", ".join(value_columns.keys()) + value_placeholders = ", ".join(["?" for _ in value_columns]) + sql = f""" WITH a AS ( - SELECT {cols_sql} + SELECT {dim_cols_sql} FROM {joins_sql} ), missing AS ( @@ -175,10 +173,10 @@ def fill_missing_dim_combinations( EXCEPT SELECT {proj} FROM {table} ) - INSERT INTO {table} ({proj}, {value_column}) - SELECT {proj}, ? FROM missing + INSERT INTO {table} ({proj}, {value_cols}) + SELECT {proj}, {value_placeholders} FROM missing """ - con.execute(sql, [fill_value]) + con.execute(sql, list(value_columns.values())) def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: @@ -292,7 +290,7 @@ def read_commodity_costs_csv(buffer_, con): """ ) - # Validate coverage + # Validate coverage for included commodities validate_coverage( con, table="commodity_costs", @@ -305,8 +303,14 @@ def read_commodity_costs_csv(buffer_, con): con, table="commodity_costs", dims=["commodity", "region", "year"], - value_column="value", - fill_value=0.0, + value_columns={"value": 0.0}, + ) + + # Confirm that coverage is now complete + validate_coverage( + con, + table="commodity_costs", + dims=["commodity", "region", "year"], ) @@ -323,7 +327,7 @@ def read_demand_csv(buffer_, con): rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 con.sql("INSERT INTO demand SELECT commodity_id, region_id, year, demand FROM rel;") - # Validate coverage + # Validate coverage for included commodities validate_coverage( con, table="demand", @@ -336,8 +340,14 @@ def read_demand_csv(buffer_, con): con, table="demand", dims=["commodity", "region", "year"], - value_column="demand", - fill_value=0.0, + value_columns={"demand": 0.0}, + ) + + # Confirm that coverage is now complete + validate_coverage( + con, + table="demand", + dims=["commodity", "region", "year"], ) @@ -361,6 +371,38 @@ def read_demand_slicing_csv(buffer_, con): """ ) + # Validate coverage for included commodities + validate_coverage( + con, + table="demand_slicing", + dims=["region", "time_slice"], + present=["commodity"], + ) + + # Fill missing combinations with fraction values from time_slices + sql = """ + WITH missing AS ( + SELECT c.id AS commodity, r.id AS region, ts.id AS time_slice + FROM commodities c + CROSS JOIN regions r + CROSS JOIN time_slices ts + EXCEPT + SELECT commodity, region, time_slice FROM demand_slicing + ) + INSERT INTO demand_slicing (commodity, region, time_slice, fraction) + SELECT commodity, region, time_slice, ts.fraction + FROM missing m + JOIN time_slices ts ON m.time_slice = ts.id + """ + con.execute(sql) + + # Confirm that coverage is now complete + validate_coverage( + con, + table="demand_slicing", + dims=["commodity", "region", "time_slice"], + ) + def read_processes_csv(buffer_, con): sql = """CREATE TABLE processes ( @@ -410,7 +452,7 @@ def read_process_parameters_csv(buffer_, con): """ ) - # Validate coverage + # Validate that coverage is complete validate_coverage( con, table="process_parameters", dims=["process", "region", "year"] ) @@ -443,7 +485,7 @@ def read_process_flows_csv(buffer_, con): """ ) - # Validate coverage + # Validate coverage for included process/commodity combinations validate_coverage( con, table="process_flows", @@ -507,7 +549,7 @@ def read_agents_csv(buffer_, con): """ ) - # Validate coverage across region/sector + # Validate there is at least one agent for every (region, sector) ensure_agents_region_sector_coverage(con) @@ -704,19 +746,17 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis a.search_rule, a.decision_rule, a.quantity, - LIST(o.objective_type) - FILTER (WHERE o.objective_type IS NOT NULL) AS objectives, + LIST(o.objective_type) AS objectives, LIST(struct_pack( objective_type := o.objective_type, objective_sort := o.objective_sort, decision_weight := o.decision_weight - )) - FILTER (WHERE o.objective_type IS NOT NULL) AS decision_params + )) AS decision_params FROM agents a - LEFT JOIN agent_objectives o ON o.agent = a.id + JOIN agent_objectives o ON o.agent = a.id WHERE a.sector = ? - GROUP BY 1,2,3,4,5 - ORDER BY 1 + GROUP BY a.id, a.region, a.search_rule, a.decision_rule, a.quantity + ORDER BY a.id """, [sector], ).fetchdf() @@ -724,14 +764,14 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis result: list[dict] = [] for _, r in df.iterrows(): params = [ - (d["objective_type"], d["objective_sort"], d["decision_weight"]) # type: ignore[index] - for d in (r["decision_params"] or []) + (d["objective_type"], d["objective_sort"], d["decision_weight"]) + for d in r["decision_params"] ] result.append( { "name": r["name"], "region": r["region"], - "objectives": (r["objectives"] or []), + "objectives": r["objectives"], "search_rules": r["search_rule"], "decision": {"name": r["decision_rule"], "parameters": params}, "quantity": r["quantity"], From 2bba85d92fc6bfea77f80139b137a20c19dfc0cc Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Fri, 15 Aug 2025 16:06:40 +0100 Subject: [PATCH 40/43] Proper validation for process flows and availabilities --- src/muse/new_input/readers.py | 85 +++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 45ed69db8..4ea0f21af 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -493,35 +493,84 @@ def read_process_flows_csv(buffer_, con): present=["process", "commodity"], ) + # Insert data for missing combinations + fill_missing_dim_combinations( + con, + table="process_flows", + dims=["process", "commodity", "region", "year"], + value_columns={"input_coeff": 0.0, "output_coeff": 0.0}, + ) + + # Confirm that coverage is now complete + validate_coverage( + con, + table="process_flows", + dims=["process", "commodity", "region", "year"], + ) + def read_process_availabilities_csv(buffer_, con): - sql = """CREATE TABLE process_availabilities ( + # Create temporary tables with shared schema + table_schema = """( process VARCHAR REFERENCES processes(id), region VARCHAR REFERENCES regions(id), year BIGINT REFERENCES years(year), time_slice VARCHAR REFERENCES time_slices(id), - limit_type VARCHAR CHECK (limit_type IN ('up','down')), value DOUBLE, - PRIMARY KEY (process, region, year, time_slice, limit_type) - ); - """ - con.sql(sql) + PRIMARY KEY (process, region, year, time_slice) + )""" + con.sql(f"CREATE TABLE process_lower_availabilities {table_schema};") + con.sql(f"CREATE TABLE process_upper_availabilities {table_schema};") + + # Read and expand data, then insert into both tables rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841 expansion_sql = chain_expanders( "rel", expand_years, expand_regions, expand_time_slices ) - con.sql( - f""" - INSERT INTO process_availabilities SELECT - process_id, - region_id, - year, - time_slice, - limit_type, - value - FROM {expansion_sql}; - """ - ) + for limit_type, table_name in [ + ("down", "process_lower_availabilities"), + ("up", "process_upper_availabilities"), + ]: + con.sql(f""" + INSERT INTO {table_name} SELECT + process_id, region_id, year, time_slice, value + FROM {expansion_sql} + WHERE limit_type = '{limit_type}'; + """) + + # Validate and fill missing combinations for both tables + for table_name, fill_value in [ + ("process_lower_availabilities", 0.0), + ("process_upper_availabilities", 1.0), + ]: + validate_coverage( + con, + table=table_name, + dims=["region", "year", "time_slice"], + present=["process"], + ) + fill_missing_dim_combinations( + con, + table=table_name, + dims=["process", "region", "year", "time_slice"], + value_columns={"value": fill_value}, + ) + validate_coverage( + con, table=table_name, dims=["process", "region", "year", "time_slice"] + ) + + # Merge into final table and cleanup + con.sql(""" + CREATE TABLE process_availabilities AS + SELECT l.process, l.region, l.year, l.time_slice, + l.value AS lower_bound, u.value AS upper_bound + FROM process_lower_availabilities l + JOIN process_upper_availabilities u USING (process, region, year, time_slice) + """) + + # Drop the temporary tables + con.sql("DROP TABLE process_lower_availabilities") + con.sql("DROP TABLE process_upper_availabilities") def read_agents_csv(buffer_, con): From 038596c24486cbbf1b0a32321be60a486243f106 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 18 Aug 2025 09:45:17 +0100 Subject: [PATCH 41/43] process_io_technodata --- src/muse/new_input/readers.py | 38 +++++++++++++++++++++++++++++++++++ tests/test_new_readers.py | 6 ++++++ 2 files changed, 44 insertions(+) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index 4ea0f21af..ddc99d322 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -829,6 +829,44 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis return result +def process_io_technodata(con: duckdb.DuckDBPyConnection, sector: str) -> xr.Dataset: + """Create an xarray Dataset for IO technodata from DB tables. + + Uses `process_flows` to build input/output coefficients over + dimensions (technology, region, year, commodity) with 'fixed' and + 'flexible' variables. Since flexible inputs/outputs are eliminated, + 'flexible' is filled with zeros. + """ + # Get both input and output coefficients for the sector + df = con.execute( + """ + SELECT + p.id AS technology, + pf.commodity, + pf.region, + pf.year, + pf.input_coeff AS fixed_inputs, + pf.output_coeff AS fixed_outputs, + 0.0 AS flexible_inputs, + 0.0 AS flexible_outputs + FROM process_flows pf + JOIN processes p ON p.id = pf.process + WHERE p.sector = ? + """, + [sector], + ).fetchdf() + + df = create_multiindex( + df, + index_columns=["technology", "region", "year", "commodity"], + index_names=["technology", "region", "year", "commodity"], + drop_columns=True, + ) + + result = create_xarray_dataset(df) + return result + + def process_initial_capacity( con: duckdb.DuckDBPyConnection, sector: str ) -> xr.DataArray: diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 57c62e0e6..0a5ee509b 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -101,6 +101,12 @@ def test_process_initial_market(con): process_initial_market(con, currency="EUR") +def test_process_io_technodata(con): + from muse.new_input.readers import process_io_technodata + + process_io_technodata(con, sector="power") + + def test_process_initial_capacity(con): from muse.new_input.readers import process_initial_capacity From ad65bef9f76f579e5f02e356700df4f5c6538584 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 18 Aug 2025 10:56:30 +0100 Subject: [PATCH 42/43] process_technodata_timeslices --- src/muse/new_input/readers.py | 149 +++++++++++++++++++++++++--------- tests/test_new_readers.py | 24 ++++-- 2 files changed, 129 insertions(+), 44 deletions(-) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index ddc99d322..e4eaf2a8b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -203,6 +203,9 @@ def read_inputs(data_dir, years: list[int]) -> duckdb.DuckDBPyConnection: with open(data_dir / filename) as f: reader(f, con) + # Set up global TIMESLICE object + setup_timeslice_globals(con) + return con @@ -685,6 +688,29 @@ def read_assets_csv(buffer_, con): ) +def setup_timeslice_globals(con: duckdb.DuckDBPyConnection): + """Set up global TIMESLICE object from database timeslice data. + + Queries the time_slices table, assembles into settings format, + and calls timeslices.setup_module to initialize the global TIMESLICE. + """ + from muse import timeslices + + timeslice_settings = {} + for season, day, time_of_day, fraction in con.execute( + """ + SELECT season, day, time_of_day, fraction + FROM time_slices + ORDER BY season, day, time_of_day + """ + ).fetchall(): + timeslice_settings.setdefault(season, {}).setdefault(day, {})[time_of_day] = ( + fraction + ) + + timeslices.setup_module(timeslice_settings) + + def process_global_commodities(con: duckdb.DuckDBPyConnection) -> xr.Dataset: """Create an xarray Dataset of global commodities from the `commodities` table.""" df = con.sql( @@ -741,6 +767,86 @@ def process_technodictionary(con: duckdb.DuckDBPyConnection, sector: str) -> xr. return result +def process_io_technodata(con: duckdb.DuckDBPyConnection, sector: str) -> xr.Dataset: + """Create an xarray Dataset for IO technodata from DB tables. + + Uses `process_flows` to build input/output coefficients over + dimensions (technology, region, year, commodity) with 'fixed' and + 'flexible' variables. Since flexible inputs/outputs are eliminated, + 'flexible' is filled with zeros. + """ + # Get both input and output coefficients for the sector + df = con.execute( + """ + SELECT + p.id AS technology, + pf.commodity, + pf.region, + pf.year, + pf.input_coeff AS fixed_inputs, + pf.output_coeff AS fixed_outputs, + 0.0 AS flexible_inputs, + 0.0 AS flexible_outputs + FROM process_flows pf + JOIN processes p ON p.id = pf.process + WHERE p.sector = ? + """, + [sector], + ).fetchdf() + + df = create_multiindex( + df, + index_columns=["technology", "region", "year", "commodity"], + index_names=["technology", "region", "year", "commodity"], + drop_columns=True, + ) + + result = create_xarray_dataset(df) + return result + + +def process_technodata_timeslices( + con: duckdb.DuckDBPyConnection, sector: str +) -> xr.Dataset: + """Create an xarray Dataset for technodata timeslices from process_availabilities. + + Maps upper_bound to utilization_factor and lower_bound to minimum_service_factor + over dimensions (technology, region, year, timeslice). + """ + from muse.timeslices import TIMESLICE, sort_timeslices + + df = con.execute( + """ + SELECT + p.id AS technology, + pa.region, + pa.year, + pa.time_slice, + pa.upper_bound AS utilization_factor, + pa.lower_bound AS minimum_service_factor + FROM process_availabilities pa + JOIN processes p ON p.id = pa.process + WHERE p.sector = ? + """, + [sector], + ).fetchdf() + + # Create dataset + df = create_multiindex( + df, + index_columns=["technology", "region", "year", "time_slice"], + index_names=["technology", "region", "year", "timeslice"], + drop_columns=True, + ) + result = create_xarray_dataset(df) + + # Stack timeslice levels (month, day, hour) into a single timeslice dimension + timeslice_levels = TIMESLICE.coords["timeslice"].indexes["timeslice"].names + if all(level in result.dims for level in timeslice_levels): + result = result.stack(timeslice=timeslice_levels) + return sort_timeslices(result) + + def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr.Dataset: """Create initial market dataset with prices and zero trade variables. @@ -754,6 +860,8 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. prices, exports, imports, static_trade. Adds coordinate units_prices = f"{currency}/{unit}" per commodity. """ + from muse.timeslices import broadcast_timeslice + if not isinstance(currency, str) or not currency.strip(): raise ValueError("currency must be a non-empty string") @@ -782,6 +890,9 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. drop_columns=True, ) result = create_xarray_dataset(df) + + # Broadcast over time slices + result = broadcast_timeslice(result) return result @@ -829,44 +940,6 @@ def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> lis return result -def process_io_technodata(con: duckdb.DuckDBPyConnection, sector: str) -> xr.Dataset: - """Create an xarray Dataset for IO technodata from DB tables. - - Uses `process_flows` to build input/output coefficients over - dimensions (technology, region, year, commodity) with 'fixed' and - 'flexible' variables. Since flexible inputs/outputs are eliminated, - 'flexible' is filled with zeros. - """ - # Get both input and output coefficients for the sector - df = con.execute( - """ - SELECT - p.id AS technology, - pf.commodity, - pf.region, - pf.year, - pf.input_coeff AS fixed_inputs, - pf.output_coeff AS fixed_outputs, - 0.0 AS flexible_inputs, - 0.0 AS flexible_outputs - FROM process_flows pf - JOIN processes p ON p.id = pf.process - WHERE p.sector = ? - """, - [sector], - ).fetchdf() - - df = create_multiindex( - df, - index_columns=["technology", "region", "year", "commodity"], - index_names=["technology", "region", "year", "commodity"], - drop_columns=True, - ) - - result = create_xarray_dataset(df) - return result - - def process_initial_capacity( con: duckdb.DuckDBPyConnection, sector: str ) -> xr.DataArray: diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 0a5ee509b..4f1c006ac 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -77,6 +77,12 @@ def test_read_assets_csv(con): con.sql("SELECT * FROM assets").fetchnumpy() +def test_setup_timeslice_globals(con): + from muse.new_input.readers import setup_timeslice_globals + + setup_timeslice_globals(con) + + def test_process_global_commodities(con): from muse.new_input.readers import process_global_commodities @@ -89,6 +95,18 @@ def test_process_technodictionary(con): process_technodictionary(con, sector="power") +def test_process_io_technodata(con): + from muse.new_input.readers import process_io_technodata + + process_io_technodata(con, sector="power") + + +def test_process_technodata_timeslices(con): + from muse.new_input.readers import process_technodata_timeslices + + process_technodata_timeslices(con, sector="power") + + def test_process_agent_parameters(con): from muse.new_input.readers import process_agent_parameters @@ -101,12 +119,6 @@ def test_process_initial_market(con): process_initial_market(con, currency="EUR") -def test_process_io_technodata(con): - from muse.new_input.readers import process_io_technodata - - process_io_technodata(con, sector="power") - - def test_process_initial_capacity(con): from muse.new_input.readers import process_initial_capacity From b599574ea21e45c66fa67497241858abdc82afa1 Mon Sep 17 00:00:00 2001 From: Tom Bland Date: Mon, 18 Aug 2025 11:09:24 +0100 Subject: [PATCH 43/43] process_technologies --- src/muse/new_input/readers.py | 26 ++++++++++++++++++++++++++ tests/test_new_readers.py | 6 ++++++ 2 files changed, 32 insertions(+) diff --git a/src/muse/new_input/readers.py b/src/muse/new_input/readers.py index e4eaf2a8b..12750983b 100644 --- a/src/muse/new_input/readers.py +++ b/src/muse/new_input/readers.py @@ -896,6 +896,32 @@ def process_initial_market(con: duckdb.DuckDBPyConnection, currency: str) -> xr. return result +def process_technologies(con: duckdb.DuckDBPyConnection, sector: str) -> xr.Dataset: + """Create an xarray Dataset combining all technology data for a sector. + + Combines technodictionary, io_technodata, and technodata_timeslices into a + single dataset with commodity usage flags. + """ + from muse.commodities import CommodityUsage + + technodata = process_technodictionary(con, sector) + io_data = process_io_technodata(con, sector) + technodata_timeslices = process_technodata_timeslices(con, sector) + technodata = technodata.merge(io_data).merge(technodata_timeslices) + + # Add commodity information + commodities = process_global_commodities(con) + technodata = technodata.merge(commodities.sel(commodity=technodata.commodity)) + + # Add commodity usage flags + technodata["comm_usage"] = ( + "commodity", + CommodityUsage.from_technologies(technodata).values, + ) + technodata = technodata.drop_vars("commodity_type") + return technodata + + def process_agent_parameters(con: duckdb.DuckDBPyConnection, sector: str) -> list[dict]: """Create a list of agent dictionaries for a sector from DB tables.""" df = con.execute( diff --git a/tests/test_new_readers.py b/tests/test_new_readers.py index 4f1c006ac..2010eff41 100644 --- a/tests/test_new_readers.py +++ b/tests/test_new_readers.py @@ -107,6 +107,12 @@ def test_process_technodata_timeslices(con): process_technodata_timeslices(con, sector="power") +def test_process_technologies(con): + from muse.new_input.readers import process_technologies + + process_technologies(con, sector="power") + + def test_process_agent_parameters(con): from muse.new_input.readers import process_agent_parameters