Skip to content

Commit

Permalink
update minigrid_envs.py
Browse files Browse the repository at this point in the history
  • Loading branch information
zenglingqi647 committed Dec 7, 2023
1 parent c33bd05 commit 05479a7
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
.vscode
setup.sh
rl-starter-files/storage
rl-starter-files/storag
ctrl.sh
rl-starter-files/evaluate/
rl-starter-files/log/
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ python -m scripts.train --algo ppo --env BabyAI-GoToImpUnlock-v0 --model GoToImp
```
The problem is, an ask probability of 0.0005 is still very bad...It takes a really long time to train.

# TODO
### Baselines
Basic:
> PPO, A2C only
Exploration(?):
> RND: https://opendilab.github.io/DI-engine/12_policies/rnd.html
> BeBold, NovelD: https://github.com/tianjunz/NovelD
> Deir

### **Update**
- Bash script of experiments of different babyai and minigrid environments can be found as `babyai.sh` and `minigrid.sh`.

Expand Down
63 changes: 63 additions & 0 deletions experimental-code/vocab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
def get_minigrid_words():
colors = ["red", "green", "blue", "yellow", "purple", "grey"]
objects = [
"unseen",
"empty",
"wall",
"floor",
"box",
"key",
"ball",
"door",
"goal",
"agent",
"lava",
]

verbs = [
"pick",
"avoid",
"get",
"find",
"put",
"use",
"open",
"go",
"fetch",
"reach",
"unlock",
"traverse",
]

extra_words = [
"up",
"the",
"a",
"at",
",",
"square",
"and",
"then",
"to",
"of",
"rooms",
"near",
"opening",
"must",
"you",
"matching",
"end",
"hallway",
"object",
"from",
"room",
"maze",
]

all_words = colors + objects + verbs + extra_words
assert len(all_words) == len(set(all_words))
return {word: i for i, word in enumerate(all_words)}

if __name__ == "__main__":
# Test the minigrid words
print(get_minigrid_words())
8 changes: 4 additions & 4 deletions rl-starter-files/envs/minigrid_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
"""

from minigrid.envs.doorkey import DoorKeyEnv
from minigrid.minigrid_env import MiniGridEnv, Grid, Door, Key, Wall, COLOR_NAMES, DIR_TO_VEC, Ball, Box
from minigrid.core.world_object import Goal
from gym_minigrid.register import register
from gym_minigrid.roomgrid import RoomGrid
from minigrid.minigrid_env import MiniGridEnv, Grid, COLOR_NAMES, DIR_TO_VEC
from minigrid.core.world_object import Goal, Door, Key, Wall, Ball, Box
from gymnasium.envs.registration import register
from minigrid.core.roomgrid import RoomGrid


class CustomDoorKeyEnv(MiniGridEnv):
Expand Down
Binary file removed rl-starter-files/results.xlsx
Binary file not shown.
Binary file removed rl-starter-files/results_.xlsx
Binary file not shown.
3 changes: 0 additions & 3 deletions rl-starter-files/utils/gpt_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,3 @@ def interact_with_gpt(prompt):
request_timeout=10
)
return output.choices[0].message['content']



8 changes: 3 additions & 5 deletions rl-starter-files/utils/planner_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(self, obs_space, action_space, vocab, llm_variant, ask_cooldown, us
self.current_skill : int = 0
self.vocab : Vocabulary = vocab
self.llm_variant = llm_variant
# load skill mmodel
for i in range(num_skills):
self.ac_models.append(self.load_model(i))

Expand All @@ -71,7 +72,7 @@ def load_model(self, index):
p.requires_grad = True
return mdl

def get_skill_distr(self, obs, memory):
def get_skill(self, obs, memory):
with self.lock:
if self.timer == 0:
invert_vocab = {v: k for k, v in self.vocab.vocab.items()}
Expand Down Expand Up @@ -99,14 +100,11 @@ def get_skill_distr(self, obs, memory):
def forward(self, obs, memory):
# for network in self.ac_models:
# network.zero_grad()
skill_network_idx = self.get_skill_distr(obs, memory)
skill_network_idx = self.get_skill(obs, memory)
result = self.ac_models[skill_network_idx](obs, memory)
for j in range(len(self.ac_models)):
if j != skill_network_idx:
model = self.ac_models[j]
for p in model.parameters():
p.grad = torch.zeros_like(p)
return result



7 changes: 5 additions & 2 deletions scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,8 @@ python3 -m scripts.evaluate --env MiniGrid-DoorKey-5x5-v0 --model DoorKey



MiniGrid-BlockedUnlockPickup all performs bad. The training return are almost all zero. LavaCrossing
MiniGrid-DistShift1 and Minigrid-SimpleCrossing, a2c outperforms a2c with reshaped reward


Train with llama
cd ../rl-starter-files
python -m scripts.train --algo ppo --env BabyAI-GoToImpUnlock-v0 --text --frames 1000000 --recurrence 20 --obs-size 11 --frames-per-proc 40 --procs 64 --batch-size 200 --ask-every 500

0 comments on commit 05479a7

Please sign in to comment.