Skip to content

Commit

Permalink
training metaenvs (such as time-series envs) from yaml files
Browse files Browse the repository at this point in the history
  • Loading branch information
felimomo committed Feb 21, 2024
1 parent 3f27ead commit 9100c3c
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 9 deletions.
14 changes: 14 additions & 0 deletions hyperpars/ppo-greencrab-ts-v2-1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# stable-baselines3 configuration

algo: "PPO"
meta_env_id: "TimeSeries-v2"
meta_env_kwargs: {N_mem: 3}
base_env_id: "GreenCrab-v2"
base_env_cfg: {r: 0.5, imm: 2000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
use_sde: True
id: "N_mem_3"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
2 changes: 1 addition & 1 deletion hyperpars/ppo-greencrab-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {}
config: {r: 0.5, imm: 2000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "1"
repo: "cboettig/rl-ecology"
Expand Down
14 changes: 14 additions & 0 deletions hyperpars/tqc-greencrab-ts-v2-1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# stable-baselines3 configuration

algo: "TQC"
meta_env_id: "TimeSeries-v2"
meta_env_kwargs: {N_mem: 3}
base_env_id: "GreenCrab-v2"
base_env_cfg: {r: 0.5, imm: 2000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
use_sde: True
id: "N_mem_3"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
4 changes: 2 additions & 2 deletions hyperpars/tqc-greencrab-v0-1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {}
total_timesteps: 1000000
config: {r: 0.5, imm: 2000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "1"
repo: "cboettig/rl-ecology"
Expand Down
2 changes: 1 addition & 1 deletion scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
args = parser.parse_args()

import rl4greencrab
from rl4greencrab.utils import sb3_train
from rl4greencrab import sb3_train

sb3_train(args.file)
10 changes: 10 additions & 0 deletions scripts/train_metaenv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/opt/venv/bin/python
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="Path config file", type=str)
args = parser.parse_args()

import rl4greencrab
from rl4greencrab import sb3_train_metaenv

sb3_train_metaenv(args.file)
7 changes: 4 additions & 3 deletions src/rl4greencrab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from rl4greencrab.invasive_ipm import invasive_IPM, invasive_IPM_v2
from rl4greencrab.ts_model import ts_env_v1
from rl4greencrab.util import sb3_train, sb3_train_v2
from rl4greencrab.ts_model import ts_env_v1, ts_env_v2
from rl4greencrab.util import sb3_train, sb3_train_v2, sb3_train_metaenv

from gymnasium.envs.registration import register
register(id="GreenCrab-v1", entry_point="rl4greencrab.invasive_ipm:invasive_IPM")
register(id="GreenCrab-v2", entry_point="rl4greencrab.invasive_ipm:invasive_IPM_v2")
register(id="TimeSeries-v1", entry_point="rl4greencrab.ts_model:ts_env_v1")
register(id="TimeSeries-v1", entry_point="rl4greencrab.ts_model:ts_env_v1")
register(id="TimeSeries-v2", entry_point="rl4greencrab.ts_model:ts_env_v2")
66 changes: 66 additions & 0 deletions src/rl4greencrab/ts_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,72 @@ def state_to_pop(self, state):



class ts_env_v2(gym.Env):
"""
takes an environment env and produces an new environemtn ts_env
whose observations are timeseries of the env environment.
v2: same as v1, but base_env is input rather than base_env_cls + base_env_cfg.
"""
def __init__(self, config = {}):
self.N_mem = config.get('N_mem', 5)
if 'base_env' not in config:
raise Warning(
"ts_env initializer needs to have a base environment "
"out of whose dynamics the time-series will be built! "
"Try: ts_env(config = {'base_env': <<your env>>, ...}). \n\n"
"(Here, <<your env>> should be a class, not an instance!)"
)
self.base_env = config['base_env']

self.action_space = self.base_env.action_space
self.observation_space = spaces.Box(
np.array(
[
- np.ones(shape=self.base_env.observation_space.shape, dtype=np.float32)
for _ in range(self.N_mem)
]
),
np.array(
[
+ np.ones(shape=self.base_env.observation_space.shape, dtype=np.float32)
for _ in range(self.N_mem)
]
),
)
#
# [[state t], [state t-1], [state t-2], ..., [state t - (N_mem-1)]]
# where each [state i] is a vector
#
_ = self.reset()

def reset(self, *, seed=42, options=None):
init_state, init_info = self.base_env.reset(seed=seed, options=options)
empty_heap = - np.ones(shape = self.observation_space.shape, dtype=np.float32)
self.heap = np.insert(
empty_heap[0:-1],
0,
init_state,
axis=0,
)
return self.heap, init_info

def step(self, action):
new_state, reward, terminated, truncated, info = self.base_env.step(action)
self.heap = np.insert(
self.heap[0:-1],
0,
new_state,
axis=0,
)
return self.heap, reward, terminated, truncated, info

def pop_to_state(self, pop):
return self.base_env.pop_to_state(pop)

def state_to_pop(self, state):
return self.base_env.state_to_pop(state)




44 changes: 42 additions & 2 deletions src/rl4greencrab/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import yaml
import os

import gymnasium as gym
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import PPO, A2C, DQN, SAC, TD3
from sb3_contrib import TQC, ARS
Expand Down Expand Up @@ -42,9 +44,9 @@ def sb3_train(config_file):
use_sde=options["use_sde"],
)

model.learn(total_timesteps=options["total_timesteps"], tb_log_name=model_id)
model.learn(total_timesteps=options["total_timesteps"], tb_log_name=model_id, progress_bar=True)

os.mkdirs(options["save_path"], exist_ok=True)
os.makedirs(options["save_path"], exist_ok=True)
model.save(save_id)
try:
full_path = save_id + ".zip"
Expand Down Expand Up @@ -72,4 +74,42 @@ def sb3_train_v2(options = dict):
model.save(model_id)
path = model_id + ".zip"
# deploy_model(path, "sb3/"+path, repo=options["repo"])
# deploy_model(config_file, "sb3/"+config_file, repo=options["repo"])

def sb3_train_metaenv(config_file):
with open(config_file, "r") as stream:
options = yaml.safe_load(stream)

vec_env = make_vec_env(
options["meta_env_id"],
options["n_envs"],
env_kwargs={
"config":
{
"base_env": gym.make(options["base_env_id"], config=options["base_env_cfg"]),
**options["meta_env_kwargs"],
}
}
)
ALGO = algorithm(options["algo"])
model_id = "{}-Meta_{}-Base_{}-{}".format(
options["algo"],
options["meta_env_id"],
options["base_env_id"],
options["id"],
)
save_id = os.path.join(options["save_path"], model_id)

model = ALGO(
"MlpPolicy",
vec_env,
verbose=0,
tensorboard_log=options["tensorboard"],
use_sde=options["use_sde"],
)
model.learn(total_timesteps=options["total_timesteps"], tb_log_name=model_id, progress_bar=True)

model.save(save_id)
# path = model_id + ".zip"
# deploy_model(path, "sb3/"+path, repo=options["repo"])
# deploy_model(config_file, "sb3/"+config_file, repo=options["repo"])

0 comments on commit 9100c3c

Please sign in to comment.