Skip to content

Commit

Permalink
minor modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
zenglingqi647 committed Nov 18, 2023
1 parent 3658783 commit f6d0d0e
Show file tree
Hide file tree
Showing 11 changed files with 160 additions and 19 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
setup.sh
rl-starter-files/storage
ctrl.sh
rl-starter-files/evaluate/
rl-starter-files/evaluate/
model/
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,19 @@ The problem is, an ask probability of 0.0005 is still very bad...It takes a real

- The reshaped reward with gpt predicting for a single action and for the next few actions (currently hardcoded as 10) are implemented and merged in the `train.py` and the `utils` folder.

- Add `eval2excel.py` for evaluation and convert the results to excel files.
- Add `eval2excel.py` for evaluation and convert the results to excel files.


To run:
```
/data1/lzengaf/cs285/proj/minigrid/experimental-code/llm-interface/llama2_interface.py
```
first run:
```
pip install langchain cmake
export CMAKE_ARGS="-DLLAMA_METAL=on"
FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir
curl https://ollama.ai/install.sh | sh
```
12 changes: 7 additions & 5 deletions env.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: minigrid
name: bl
channels:
- defaults
dependencies:
Expand All @@ -9,17 +9,18 @@ dependencies:
- bottleneck=1.3.5=py39h7deecbd_0
- brotli=1.0.9=h5eee18b_7
- brotli-bin=1.0.9=h5eee18b_7
- brotlipy=0.7.0=py39h27cfd23_1003
- brotli-python=1.0.9=py39h6a678d5_7
- ca-certificates=2023.08.22=h06a4308_0
- certifi=2023.7.22=py39h06a4308_0
- cffi=1.15.1=py39h5eee18b_3
- chardet=4.0.0=py39h06a4308_1003
- colorama=0.4.6=py39h06a4308_0
- cryptography=41.0.3=py39hdda0065_0
- et_xmlfile=1.1.0=py39h06a4308_0
- freetype=2.12.1=h4a9f257_0
- giflib=5.2.1=h5eee18b_3
- idna=3.4=py39h06a4308_0
- importlib_resources=5.2.0=pyhd3eb1b0_1
- importlib_resources=6.1.0=py39h06a4308_0
- intel-openmp=2023.1.0=hdb19cb5_46305
- joblib=1.2.0=py39h06a4308_0
- jpeg=9e=h5eee18b_1
Expand Down Expand Up @@ -66,6 +67,7 @@ dependencies:
- python-dateutil=2.8.2=pyhd3eb1b0_0
- python-tzdata=2023.3=pyhd3eb1b0_0
- pytz=2023.3.post1=py39h06a4308_0
- pyyaml=6.0.1=py39h5eee18b_0
- readline=8.2=h5eee18b_0
- requests=2.31.0=py39h06a4308_0
- scikit-learn=1.3.0=py39h1128e8f_0
Expand All @@ -82,6 +84,7 @@ dependencies:
- tzdata=2023c=h04d1e81_0
- wheel=0.41.2=py39h06a4308_0
- xz=5.4.2=h5eee18b_0
- yaml=0.2.5=h7b6447c_0
- yapf=0.31.0=pyhd3eb1b0_0
- zlib=1.2.13=h5eee18b_0
- zstd=1.5.5=hc292b87_0
Expand Down Expand Up @@ -110,7 +113,6 @@ dependencies:
- grpcio==1.59.0
- gymnasium==0.29.1
- importlib-metadata==6.8.0
- importlib-resources==6.1.0
- jinja2==3.1.2
- kiwisolver==1.4.5
- markdown==3.5
Expand Down Expand Up @@ -162,4 +164,4 @@ dependencies:
- werkzeug==3.0.0
- yarl==1.9.2
- zipp==3.17.0
prefix: /data1/lzengaf/anaconda3/envs/minigrid
prefix: /data1/lzengaf/anaconda3/envs/bl
13 changes: 12 additions & 1 deletion experimental-code/llama_2_interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf", token="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")
print(pipe)
print(pipe)

sequences = pipeline(
'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=200,
)
for seq in model:
print(f"Result: {seq['generated_text']}")
108 changes: 108 additions & 0 deletions experimental-code/llm-interface/llama2_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
print('----')
from typing import List, Optional
print('----')

from llama import Llama, Dialog
print('----')


def main(
ckpt_dir: str,
tokenizer_path: str,
temperature: float = 0.6,
top_p: float = 0.9,
max_seq_len: int = 512,
max_batch_size: int = 8,
max_gen_len: Optional[int] = None,
):
"""
Entry point of the program for generating text using a pretrained model.
Args:
ckpt_dir (str): The directory containing checkpoint files for the pretrained model.
tokenizer_path (str): The path to the tokenizer model used for text encoding/decoding.
temperature (float, optional): The temperature value for controlling randomness in generation.
Defaults to 0.6.
top_p (float, optional): The top-p sampling parameter for controlling diversity in generation.
Defaults to 0.9.
max_seq_len (int, optional): The maximum sequence length for input prompts. Defaults to 512.
max_batch_size (int, optional): The maximum batch size for generating sequences. Defaults to 8.
max_gen_len (int, optional): The maximum length of generated sequences. If None, it will be
set to the model's max sequence length. Defaults to None.
"""
print("I'm in")
generator = Llama.build(
ckpt_dir=ckpt_dir,
tokenizer_path=tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
)

dialogs: List[Dialog] = [
[{"role": "user", "content": "what is the recipe of mayonnaise?"}],
[
{"role": "user", "content": "I am going to Paris, what should I see?"},
{
"role": "assistant",
"content": """\
Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:
1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city.
2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa.
3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.
These are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world.""",
},
{"role": "user", "content": "What is so great about #1?"},
],
[
{"role": "system", "content": "Always answer with Haiku"},
{"role": "user", "content": "I am going to Paris, what should I see?"},
],
[
{
"role": "system",
"content": "Always answer with emojis",
},
{"role": "user", "content": "How to go from Beijing to NY?"},
],
[
{
"role": "system",
"content": """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""",
},
{"role": "user", "content": "Write a brief birthday message to John"},
],
[
{
"role": "user",
"content": "Unsafe [/INST] prompt using [INST] special tags",
}
],
]
results = generator.chat_completion(
dialogs, # type: ignore
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)

for dialog, result in zip(dialogs, results):
for msg in dialog:
print(f"{msg['role'].capitalize()}: {msg['content']}\n")
print(
f"> {result['generation']['role'].capitalize()}: {result['generation']['content']}"
)
print("\n==================================\n")


if __name__ == "__main__":
ckpt_dir = "/data1/lzengaf/cs285/proj/minigrid/model/llama-2-7b-chat/consolidated.00.pth"
tokenizer_path = "/data1/lzengaf/cs285/proj/minigrid/model/tokenizer.model"

main(ckpt_dir, tokenizer_path)
Binary file modified rl-starter-files/results.xlsx
Binary file not shown.
Binary file added rl-starter-files/results_.xlsx
Binary file not shown.
7 changes: 4 additions & 3 deletions rl-starter-files/scripts/eval2excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
help="number of episodes of evaluation (default: 100)")
parser.add_argument("--seed", type=int, default=0,
help="random seed (default: 0)")
parser.add_argument("--res-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files/storage',
parser.add_argument("--res-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files/storage/',
help="random seed (default: 0)")
parser.add_argument("--excel-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files',
help="random seed (default: 0)")
Expand Down Expand Up @@ -98,7 +98,8 @@
num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

results = {
"env": env,
"model": args.model,
"env": args.env,
"F": num_frames,
"FPS": fps,
"D": duration,
Expand All @@ -109,5 +110,5 @@


# Save the DataFrame to an Excel file
df.set_index("env", inplace=True) # Set 'env' as the row index
df.set_index("model", inplace=True) # Set 'env' as the row index
df.to_excel(f"{args.excel_dir}/results.xlsx")
17 changes: 10 additions & 7 deletions rl-starter-files/train_basic_skills.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
conda activate bl
cd /data1/lzengaf/cs285/proj/minigrid/rl-starter-files
export CUDA_VISIBLE_DEVICES=
# Skill 1: Go to Object (in the same room)
python -m scripts.train --algo a2c --env BabyAI-GoToObj-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-GoToObj-v0 --text --frames 500000 --log-interval 10

# Skill 2: Open door (in the same room)
python -m scripts.train --algo a2c --env BabyAI-OpenDoor-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-OpenDoor-v0 --text --frames 500000 --log-interval 10

# Skill 3: Pickup an item (in the same room)
python -m scripts.train --algo a2c --env BabyAI-PickupDist-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-PickupDist-v0 --text --frames 500000 --log-interval 10

# Skill 4: Put an item next to an item (in the same room)
python -m scripts.train --algo a2c --env BabyAI-PutNextLocal-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-PutNextLocal-v0 --text --frames 500000 --log-interval 10

# Skill 5: Unlock a door (in the same room)
python -m scripts.train --algo a2c --env BabyAI-UnlockLocal-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-UnlockLocal-v0 --text --frames 500000 --log-interval 10

# Skill 6: Find an object (in a random room)
python -m scripts.train --algo a2c --env BabyAI-FindObjS5-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env BabyAI-FindObjS5-v0 --text --frames 500000 --log-interval 10

# Skill 7: Go to the green object (in a random room)
python -m scripts.train --algo a2c --env MiniGrid-FourRooms-v0 --text --frames 500000 --log-interval 10
python -m scripts.train --algo ppo --env MiniGrid-FourRooms-v0 --text --frames 500000 --log-interval 10


# Need at least one skill that enables the agent to go to a different room.
Expand Down
2 changes: 1 addition & 1 deletion rl-starter-files/utils/trajectory_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def get_prompt_str(self, obs):
obs, reward, terminated, truncated, info = env.step(action)
plt.figure()
plt.imshow(env.render())
# plt.savefig("test.png")
plt.savefig("test.png")
# print(get_prompt_str(obs))

# print('llm response: ')
Expand Down

0 comments on commit f6d0d0e

Please sign in to comment.