Skip to content

Commit

Permalink
adding multiprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason Peng committed Sep 17, 2024
1 parent fff25c7 commit d3e608e
Show file tree
Hide file tree
Showing 18 changed files with 513 additions and 120 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@ and it should be good to go.

To train a policy, run the following command:

``python run.py --env_config data/envs/dm_cheetah.yaml --agent_config a2/dm_cheetah_cem_agent.yaml --mode train --log_file output/log.txt --out_model_file output/model.pt --visualize``
``python run.py --env_config data/envs/dm_cheetah.yaml --agent_config a2/dm_cheetah_cem_agent.yaml --mode train --log_file output/log.txt --out_model_file output/model.pt --visualize --num_workers 1``

- `--env_config` specifies the configuration file for the environment.
- `--agent_config` specifies configuration file for the agent.
- `--visualize` enables visualization. Rendering should be disabled for faster training.
- `--log_file` specifies the output log file, which will keep track of statistics during training.
- `--out_model_file` specifies the output model file, which contains the model parameters.
- `--num_workers` specifies the number of worker processes used to parallelize training.

## Test Models

To load a trained model, run the following command:

``python run.py --env_config data/envs/dm_cheetah_env.yaml --agent_config a2/dm_cheetah_cem_agent.yaml --mode test --model_file data/models/dm_cheetah_ppo_model.pt --visualize``
``python run.py --env_config data/envs/dm_cheetah_env.yaml --agent_config a2/dm_cheetah_cem_agent.yaml --mode test --model_file data/models/dm_cheetah_ppo_model.pt --visualize --num_workers 1``

- `--model_file` specifies the `.pt` file that contains parameters for the trained model. Pretrained models are available in `data/models/`.

Expand Down
16 changes: 10 additions & 6 deletions a1/bc_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import learning.agent_builder as agent_builder
import learning.base_agent as base_agent
import learning.bc_model as bc_model
import util.mp_util as mp_util
import util.torch_util as torch_util

class BCAgent(base_agent.BaseAgent):
Expand All @@ -15,11 +16,16 @@ def __init__(self, config, env, device):

def _load_params(self, config):
super()._load_params(config)

buffer_size = config["exp_buffer_size"]
self._exp_buffer_length = max(buffer_size, self._steps_per_iter)

num_procs = mp_util.get_num_procs()

self._batch_size = config["batch_size"]
self._batch_size = int(np.ceil(self._batch_size / num_procs))

buffer_size = config["exp_buffer_size"]
self._exp_buffer_length = int(np.ceil(buffer_size / num_procs))
self._exp_buffer_length = max(self._exp_buffer_length, self._steps_per_iter)

self._update_epochs = config["update_epochs"]
return

Expand Down Expand Up @@ -81,10 +87,8 @@ def _update_model(self):
batch = self._exp_buffer.sample(batch_size)
loss_info = self._compute_loss(batch)

self._optimizer.zero_grad()
loss = loss_info["loss"]
loss.backward()
self._optimizer.step()
self._optimizer.step(loss)

torch_util.add_torch_dict(loss_info, train_info)

Expand Down
5 changes: 4 additions & 1 deletion a1/dm_cheetah_bc_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ model:
actor_std_type: "FIXED"
action_std: 0.2

optimizer:
type: "SGD"
learning_rate: 5e-4

expert_config: "data/agents/bc_expert_agent.yaml"
expert_model_file: "data/models/dm_cheetah_expert_model.pt"

Expand All @@ -14,7 +18,6 @@ steps_per_iter: 100
iters_per_output: 10
test_episodes: 10

learning_rate: 5e-4
update_epochs: 20

exp_buffer_size: 1000000
Expand Down
7 changes: 5 additions & 2 deletions a1/dm_walker_bc_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ model:
actor_init_output_scale: 0.01
actor_std_type: "FIXED"
action_std: 0.2


optimizer:
type: "SGD"
learning_rate: 5e-4

expert_config: "data/agents/bc_expert_agent.yaml"
expert_model_file: "data/models/dm_walker_run_expert_model.pt"

Expand All @@ -14,7 +18,6 @@ steps_per_iter: 100
iters_per_output: 10
test_episodes: 10

learning_rate: 5e-4
update_epochs: 20

exp_buffer_size: 1000000
Expand Down
40 changes: 36 additions & 4 deletions a2/cem_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import learning.base_agent as base_agent
import learning.cem_model as cem_model
import util.mp_util as mp_util

class CEMAgent(base_agent.BaseAgent):
NAME = "CEM"
Expand Down Expand Up @@ -57,10 +58,16 @@ def _update_sample_count(self):
return self._sample_count

def _train_iter(self):
candidates = self._sample_candidates(self._population_size)
num_procs = mp_util.get_num_procs()
num_candidates = int(np.ceil(self._population_size / num_procs))
candidates = self._sample_candidates(num_candidates)

rets, ep_lens = self._eval_candidates(candidates)

candidates, rets, ep_lens = self._gather_candidates(candidates, rets, ep_lens)

rets = rets.cpu().numpy()
ep_lens = ep_lens.cpu().numpy()

curr_best_idx = np.argmax(rets)
curr_best_ret = rets[curr_best_idx]

Expand All @@ -82,6 +89,8 @@ def _train_iter(self):
num_eps = self._population_size * self._eps_per_candidate
mean_param_std = torch.mean(new_std)

assert(self._check_synced()), "Network parameters desynchronized"

train_info = {
"mean_return": train_return,
"mean_ep_len": train_ep_len,
Expand All @@ -90,6 +99,29 @@ def _train_iter(self):
}
return train_info

def _gather_candidates(self, candidates, rets, ep_lens):
candidates = mp_util.all_gather(candidates)
rets = mp_util.all_gather(rets)
ep_lens = mp_util.all_gather(ep_lens)

candidates = torch.cat(candidates, dim=0)
rets = torch.cat(rets, dim=0)
ep_lens = torch.cat(ep_lens, dim=0)

return candidates, rets, ep_lens

def _check_synced(self):
global_param_mean = mp_util.broadcast(self._param_mean)
global_param_std = mp_util.broadcast(self._param_std)
global_best_params = mp_util.broadcast(self._best_params)

synced = torch.equal(global_param_mean, self._param_mean)
synced &= torch.equal(global_param_std, self._param_std)
synced &= torch.equal(global_best_params, self._best_params)

return synced



def _sample_candidates(self, n):
'''
Expand Down Expand Up @@ -118,8 +150,8 @@ def _eval_candidates(self, candidates):
n = candidates.shape[0]

# placeholder
rets = np.zeros(n)
ep_lens = np.zeros(n)
rets = torch.zeros(n, device=self._device)
ep_lens = torch.zeros(n, device=self._device)

return rets, ep_lens

Expand Down
13 changes: 8 additions & 5 deletions a2/dm_cheetah_pg_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ model:

critic_net: "fc_2layers_128units"


actor_optimizer:
type: "SGD"
learning_rate: 2e-3

critic_optimizer:
type: "SGD"
learning_rate: 2e-3

discount: 0.99
steps_per_iter: 4096
iters_per_output: 50
test_episodes: 32
critic_update_epoch: 5

# optimizer parameters
actor_learning_rate: 2e-3
critic_learning_rate: 2e-3

batch_size: 512
norm_adv_clip: 4.0
action_bound_weight: 10.0
29 changes: 16 additions & 13 deletions a2/pg_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

import envs.base_env as base_env
import learning.base_agent as base_agent
import learning.mp_optimizer as mp_optimizer
import learning.pg_model as pg_model
import util.mp_util as mp_util
import util.torch_util as torch_util

class PGAgent(base_agent.BaseAgent):
Expand All @@ -16,7 +18,11 @@ def __init__(self, config, env, device):
def _load_params(self, config):
super()._load_params(config)

num_procs = mp_util.get_num_procs()

self._batch_size = config["batch_size"]
self._batch_size = int(np.ceil(self._batch_size / num_procs))

self._critic_update_epoch = config["critic_update_epoch"]
self._norm_adv_clip = config["norm_adv_clip"]
self._action_bound_weight = config["action_bound_weight"]
Expand Down Expand Up @@ -56,16 +62,15 @@ def _init_iter(self):
return

def _build_optimizer(self, config):
actor_lr = float(config["actor_learning_rate"])
critic_lr = float(config["critic_learning_rate"])

actor_opt_config = config["actor_optimizer"]
actor_params = list(self._model._actor_layers.parameters())+list(self._model._action_dist.parameters())
actor_params_grad = [p for p in actor_params if p.requires_grad]
self._actor_optimizer = torch.optim.SGD(actor_params_grad, actor_lr, momentum=0.9)

self._actor_optimizer = mp_optimizer.MPOptimizer(actor_opt_config, actor_params_grad)

critic_opt_config = config["critic_optimizer"]
critic_params = list(self._model._critic_layers.parameters())+list(self._model._critic_out.parameters())
critic_params_grad = [p for p in critic_params if p.requires_grad]
self._critic_optimizer = torch.optim.SGD(critic_params_grad, critic_lr, momentum=0.9)
self._critic_optimizer = mp_optimizer.MPOptimizer(critic_opt_config, critic_params_grad)

return

Expand Down Expand Up @@ -152,9 +157,7 @@ def _update_critic(self, batch):

loss = self._calc_critic_loss(norm_obs, tar_val)

self._critic_optimizer.zero_grad()
loss.backward()
self._critic_optimizer.step()
self._critic_optimizer.step(loss)

info = {
"critic_loss": loss
Expand All @@ -179,13 +182,13 @@ def _update_actor(self, batch):
action_bound_loss = torch.mean(action_bound_loss)
loss += self._action_bound_weight * action_bound_loss
info["action_bound_loss"] = action_bound_loss.detach()

self._actor_optimizer.zero_grad()
loss.backward()
self._actor_optimizer.step()

self._actor_optimizer.step(loss)

return info



def _calc_return(self, r, done):
'''
TODO 2.1: Given a tensor of per-timestep rewards (r), and a tensor (done)
Expand Down
5 changes: 3 additions & 2 deletions a3/atari_breakout_dqn_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ iters_per_output: 50000
test_episodes: 10
normalizer_samples: 0

# optimizer parameters
learning_rate: 5e-4
optimizer:
type: "SGD"
learning_rate: 5e-4

exp_buffer_size: 200000
updates_per_iter: 1
Expand Down
5 changes: 3 additions & 2 deletions a3/atari_pong_dqn_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ iters_per_output: 50000
test_episodes: 10
normalizer_samples: 0

# optimizer parameters
learning_rate: 5e-4
optimizer:
type: "SGD"
learning_rate: 5e-4

exp_buffer_size: 200000
updates_per_iter: 1
Expand Down
18 changes: 12 additions & 6 deletions a3/dqn_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,34 @@
import envs.base_env as base_env
import learning.base_agent as base_agent
import learning.dqn_model as dqn_model
import util.mp_util as mp_util
import util.torch_util as torch_util

class DQNAgent(base_agent.BaseAgent):
NAME = "DQN"

def __init__(self, config, env, device):
super().__init__(config, env, device)

self._sync_tar_model()
return

def _load_params(self, config):
super()._load_params(config)

num_procs = mp_util.get_num_procs()

buffer_size = config["exp_buffer_size"]
self._exp_buffer_length = int(buffer_size)
self._exp_buffer_length = int(np.ceil(buffer_size / (num_procs)))
self._exp_buffer_length = max(self._exp_buffer_length, self._steps_per_iter)

self._updates_per_iter = config["updates_per_iter"]
self._batch_size = config["batch_size"]
self._batch_size = int(np.ceil(self._batch_size / num_procs))

self._init_samples = config["init_samples"]
self._init_samples = int(np.ceil(self._init_samples / num_procs))

self._updates_per_iter = config["updates_per_iter"]
self._tar_net_update_iters = config["tar_net_update_iters"]

self._exp_anneal_samples = config.get("exp_anneal_samples", np.inf)
Expand All @@ -40,7 +49,6 @@ def _build_model(self, config):
for param in self._tar_model.parameters():
param.requires_grad = False

self._sync_tar_model()
return

def _get_exp_buffer_length(self):
Expand Down Expand Up @@ -80,10 +88,8 @@ def _update_model(self):
batch = self._exp_buffer.sample(self._batch_size)
loss_info = self._compute_loss(batch)

self._optimizer.zero_grad()
loss = loss_info["loss"]
loss.backward()
self._optimizer.step()
self._optimizer.step(loss)

torch_util.add_torch_dict(loss_info, train_info)

Expand Down
10 changes: 5 additions & 5 deletions data/agents/bc_expert_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ model:

critic_net: "fc_2layers_128units"


optimizer:
type: "SGD"
learning_rate: 1e-3

discount: 0.99
steps_per_iter: 4096
iters_per_output: 10
test_episodes: 32

# optimizer parameters
learning_rate: 1e-3
test_episodes: 32
Loading

0 comments on commit d3e608e

Please sign in to comment.