Skip to content

Commit

Permalink
Refactoring n testing (#6)
Browse files Browse the repository at this point in the history
* training metaenvs (such as time-series envs) from yaml files

* trial and error searching through param space

* greenCrabEnv, greenCrabSimplifiedEnv, timeSeriesEnv

* GreenCrabSimplifiedEnv

* benchmark agents

* utils subdirectory
  • Loading branch information
felimomo authored Feb 25, 2024
1 parent 8d0026e commit 8fc52ca
Show file tree
Hide file tree
Showing 22 changed files with 303 additions and 205 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {r: 0.5, imm: 2000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
total_timesteps: 1000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.05}
use_sde: True
id: "bmk-1"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {r: 0.5, imm: 1000, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
total_timesteps: 1000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "bmk-2"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
total_timesteps: 1000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.15}
use_sde: True
id: "bmk-3"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {r: 0.5, imm: 250, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
total_timesteps: 1000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.2}
use_sde: True
id: "bmk-4"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 6000000
config: {r: 0.5, imm: 125, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
total_timesteps: 1000000
config: {r: 0.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.25}
use_sde: True
id: "bmk-5"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
13 changes: 13 additions & 0 deletions hyperpars/systematic-benchmarks/TQC-Nmem_1/6-tqc_nmem-1_bmk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# stable-baselines3 configuration

algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 1000000
config: {r: 0.7, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "bmk-6"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
13 changes: 13 additions & 0 deletions hyperpars/systematic-benchmarks/TQC-Nmem_1/7-tqc_nmem-1_bmk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# stable-baselines3 configuration

algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 1000000
config: {r: 0.9, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "bmk-7"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
13 changes: 13 additions & 0 deletions hyperpars/systematic-benchmarks/TQC-Nmem_1/8-tqc_nmem-1_bmk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# stable-baselines3 configuration

algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 1000000
config: {r: 1.1, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "bmk-8"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
13 changes: 13 additions & 0 deletions hyperpars/systematic-benchmarks/TQC-Nmem_1/9-tqc_nmem-1_bmk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# stable-baselines3 configuration

algo: "TQC"
env_id: "GreenCrab-v2"
n_envs: 12
tensorboard: "/home/rstudio/logs"
total_timesteps: 1000000
config: {r: 1.5, imm: 500, problem_scale: 2000, action_reward_scale: 0.5, env_stoch: 0.1}
use_sde: True
id: "bmk-9"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/saved_agents"
progress_bar: False
10 changes: 5 additions & 5 deletions scripts/train_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
scriptdir="$(dirname "$0")"
cd "$scriptdir"

python scripts/train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/1-tqc_nmem-1_bmk.yml &
python scripts/train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/2-tqc_nmem-1_bmk.yml &
python scripts/train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/3-tqc_nmem-1_bmk.yml &
python scripts/train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/4-tqc_nmem-1_bmk.yml &
python scripts/train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/5-tqc_nmem-1_bmk.yml &
python train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/10-tqc_nmem-1_bmk.yml &
python train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/11-tqc_nmem-1_bmk.yml &
python train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/12-tqc_nmem-1_bmk.yml &
python train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/13-tqc_nmem-1_bmk.yml &
python train.py --file ../hyperpars/systematic-benchmarks/TQC-Nmem_1/14-tqc_nmem-1_bmk.yml &
24 changes: 17 additions & 7 deletions src/rl4greencrab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
from rl4greencrab.invasive_ipm import invasive_IPM, invasive_IPM_v2
from rl4greencrab.ts_model import ts_env_v1, ts_env_v2
from rl4greencrab.util import sb3_train, sb3_train_v2, sb3_train_metaenv
from rl4greencrab.envs.green_crab_ipm import greenCrabEnv, greenCrabSimplifiedEnv
from rl4greencrab.envs.time_series import timeSeriesEnv
from rl4greencrab.agents.const_action import constAction
from rl4greencrab.agents.const_escapement import constEsc
# from envs.util import sb3_train, sb3_train_v2, sb3_train_metaenv

from gymnasium.envs.registration import register
register(id="GreenCrab-v1", entry_point="rl4greencrab.invasive_ipm:invasive_IPM")
register(id="GreenCrab-v2", entry_point="rl4greencrab.invasive_ipm:invasive_IPM_v2")
register(id="TimeSeries-v1", entry_point="rl4greencrab.ts_model:ts_env_v1")
register(id="TimeSeries-v2", entry_point="rl4greencrab.ts_model:ts_env_v2")
register(
id="GreenCrab",
entry_point="rl4greencrab.green_crab_ipm:greenCrabEnv",
)
register(
id="GreenCrabSimpl",
entry_point="rl4greencrab.green_crab_ipm:greenCrabSimplifiedEnv"
)
register(
id="TimeSeries",
entry_point="rl4greencrab.time_series:TimeSeriesEnv",
)
Empty file.
11 changes: 11 additions & 0 deletions src/rl4greencrab/agents/const_action.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class constAction:
def __init__(self, mortality=0, env = None, **kwargs):
self.mortality = mortality
self.action = 2 * self.mortality - 1
self.env = env

def predict(self, observation):
return self.action



23 changes: 23 additions & 0 deletions src/rl4greencrab/agents/const_escapement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class constEsc:
def __init__(self, escapement, env = None):
self.escapement = escapement
self.env = env
self.bound = 1
if self.env is not None:
self.bound = self.env.bound

def predict(self, observation):
obs_nat_units = self.bound * self.to_01(observation)
if obs_nat_units <= self.escapement or obs_nat_units <= 0:
return -1
mortality = (obs_nat_units - self.escapement) / self.escapement
return self.to_pm1(mortality)

def to_01(self, val):
return (val + 1 ) / 2

def to_pm1(self, val):
return 2 * val - 1



Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
with ts_model.py
"""

class invasive_IPM(gym.Env):
class greenCrabEnv(gym.Env):
metadata = {"render.modes": ["human"]}

def __init__(
Expand Down Expand Up @@ -55,8 +55,11 @@ def __init__(

self.w_mort_scale = config.get("w_mort_scale", 5)
self.K = config.get("K", 25000) #carrying capacity
self.imm = config.get("imm", 2) #colonization/immigration rate
self.r = config.get("r", 2) #intrinsic rate of growth
self.imm = config.get("imm", 1000) #colonization/immigration rate
self.r = config.get("r", 0.5) #intrinsic rate of growth

self.max_action = config.get("max_action", 2000)
self.max_obs = config.get("max_obs", 2000)

self.area = config.get("area", 4000)
self.loss_a = config.get("loss_a", 0.265)
Expand All @@ -70,11 +73,11 @@ def __init__(

self.delta_t = config.get("delta_t", 1/12)
self.env_stoch = config.get("env_stoch", 0.1)
self.action_reward_scale = config.get("action_reward_scale", 0.001)
self.action_reward_scale = config.get("action_reward_scale", 0.5)
self.config = config

# Preserve these for reset
self.observations = np.array([0,0,0,0,0,0,0,0,0], dtype=np.float32)
self.observations = np.zeros(shape=9, dtype=np.float32)
self.reward = 0
self.years_passed = 0
self.Tmax = config.get("Tmax", 100)
Expand All @@ -92,14 +95,14 @@ def __init__(
# action -- # traps per month
self.action_space = spaces.Box(
np.array([0], dtype=np.float32),
np.array([2000], dtype=np.float32),
np.array([self.max_action], dtype=np.float32),
dtype=np.float32,
)

# Observation space
self.observation_space = spaces.Box(
np.array([0,0,0,0,0,0,0,0,0], dtype=np.float32),
np.array([2000,2000,2000,2000,2000,2000,2000,2000,2000], dtype=np.float32),
np.zeros(shape=9, dtype=np.float32),
self.max_obs * np.ones(shape=9, dtype=np.float32),
dtype=np.float32,
)

Expand Down Expand Up @@ -141,7 +144,7 @@ def step(self,action):
size_freq[:,j+1] = [np.random.binomial(n=n_j[k], p=self.pmort) for k in range(self.nsize)]
removed[:,j+1] = [np.random.binomial(size_freq[k,j+1], harvest_rate[k]) for k in range(self.nsize)]

self.observations = np.array([np.sum(removed[:,j]) for j in range(self.ntime)])
self.observations = np.array([np.sum(removed[:,j]) for j in range(self.ntime)], dtype = np.float32)

# for k in range(21):
# #project to next size frequency
Expand Down Expand Up @@ -186,15 +189,15 @@ def step(self,action):

return self.observations, self.reward, done, done, {}

def reset(self, seed=42, options=None):
def reset(self, *, seed=42, options=None):
self.state = self.init_state()
self.years_passed = 0

# for tracking only
self.reward = 0

# self.observations = np.zeros(shape=self.ntime)
self.observations = np.random.randint(0,100, size=self.ntime)
self.observations = np.float32(np.random.randint(0,100, size=self.ntime))

return self.observations, {}

Expand Down Expand Up @@ -253,12 +256,12 @@ def dd_growth(self,popsize):
# 1. impact on environment (function of crab density)
# 2. penalty for how much effort we expended (function of action)
def reward_func(self,action):
reward = -self.loss_a/(1+np.exp(-self.loss_b*(np.sum(self.state)/self.area-self.loss_c)))-self.action_reward_scale*action/2000
reward = -self.loss_a/(1+np.exp(-self.loss_b*(np.sum(self.state)/self.area-self.loss_c)))-self.action_reward_scale*action/self.max_action
return reward


class invasive_IPM_v2(invasive_IPM):
""" like invasive_IPM but with simplified observations. """
class greenCrabSimplifiedEnv(greenCrabEnv):
""" like invasive_IPM but with simplified observations and normalized to -1, 1 space. """
def __init__(self, config={}):
super().__init__(config=config)
self.observation_space = spaces.Box(
Expand All @@ -271,32 +274,30 @@ def __init__(self, config={}):
np.float32([1]),
dtype=np.float32,
)
# self.r = 0.5
# self.imm = 1
# self.K=25_000
self.problem_scale = config.get('problem_scale', 50) # ad hoc based on previous values
self.max_action = config.get('max_action', 2000) # ad hoc based on previous values
self.cpue_normalization = config.get('cpue_normalization', 100)

def step(self, action):
scaled_action = np.maximum( self.problem_scale * (1 + action)/2 , 0.)
action_natural_units = np.maximum( self.max_action * (1 + action)/2 , 0.)
obs, rew, term, trunc, info = super().step(
np.float32(scaled_action)
np.float32(action_natural_units)
)
normalized_cpue = 2 * self.cpue_2(obs, scaled_action) - 1
normalized_cpue = 2 * self.cpue_2(obs, action_natural_units) - 1
observation = np.float32(np.append(normalized_cpue, action))
return observation, rew, term, trunc, info

def reset(self, seed=42, options=None):
def reset(self, *, seed=42, options=None):
_, info = super().reset(seed=seed, options=options)

# completely new obs
return - np.ones(shape=self.observation_space.shape, dtype=np.float32), info

def cpue_2(self, obs, scaled_action):
if any(scaled_action <= 0):
def cpue_2(self, obs, action_natural_units):
if any(action_natural_units <= 0):
return np.float32([0,0])
cpue_2 = np.float32([
np.sum(obs[0:5]) / (100 * scaled_action[0]),
np.sum(obs[5:]) / (100 * scaled_action[0])
np.sum(obs[0:5]) / (self.cpue_normalization * action_natural_units[0]),
np.sum(obs[5:]) / (self.cpue_normalization * action_natural_units[0])
])
return cpue_2

Expand Down
Loading

0 comments on commit 8fc52ca

Please sign in to comment.