minor modifications

zenglingqi647 · Nov 18, 2023 · f6d0d0e · f6d0d0e
1 parent 3658783
commit f6d0d0e
Show file tree

Hide file tree

Showing 11 changed files with 160 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@
 setup.sh
 rl-starter-files/storage
 ctrl.sh
-rl-starter-files/evaluate/
+rl-starter-files/evaluate/
+model/
diff --git a/README.md b/README.md
@@ -27,4 +27,19 @@ The problem is, an ask probability of 0.0005 is still very bad...It takes a real
 
 - The reshaped reward with gpt predicting for a single action and for the next few actions (currently hardcoded as 10) are implemented and merged in the `train.py` and the `utils` folder.
 
-- Add `eval2excel.py` for evaluation and convert the results to excel files.
+- Add `eval2excel.py` for evaluation and convert the results to excel files.
+
+
+To run:
+```
+/data1/lzengaf/cs285/proj/minigrid/experimental-code/llm-interface/llama2_interface.py
+```
+first run:
+```
+pip install langchain cmake
+export CMAKE_ARGS="-DLLAMA_METAL=on"
+FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir
+
+curl https://ollama.ai/install.sh | sh
+
+```
diff --git a/env.yml b/env.yml
@@ -1,4 +1,4 @@
-name: minigrid
+name: bl
 channels:
   - defaults
 dependencies:
@@ -9,17 +9,18 @@ dependencies:
   - bottleneck=1.3.5=py39h7deecbd_0
   - brotli=1.0.9=h5eee18b_7
   - brotli-bin=1.0.9=h5eee18b_7
-  - brotlipy=0.7.0=py39h27cfd23_1003
+  - brotli-python=1.0.9=py39h6a678d5_7
   - ca-certificates=2023.08.22=h06a4308_0
   - certifi=2023.7.22=py39h06a4308_0
   - cffi=1.15.1=py39h5eee18b_3
+  - chardet=4.0.0=py39h06a4308_1003
   - colorama=0.4.6=py39h06a4308_0
   - cryptography=41.0.3=py39hdda0065_0
   - et_xmlfile=1.1.0=py39h06a4308_0
   - freetype=2.12.1=h4a9f257_0
   - giflib=5.2.1=h5eee18b_3
   - idna=3.4=py39h06a4308_0
-  - importlib_resources=5.2.0=pyhd3eb1b0_1
+  - importlib_resources=6.1.0=py39h06a4308_0
   - intel-openmp=2023.1.0=hdb19cb5_46305
   - joblib=1.2.0=py39h06a4308_0
   - jpeg=9e=h5eee18b_1
@@ -66,6 +67,7 @@ dependencies:
   - python-dateutil=2.8.2=pyhd3eb1b0_0
   - python-tzdata=2023.3=pyhd3eb1b0_0
   - pytz=2023.3.post1=py39h06a4308_0
+  - pyyaml=6.0.1=py39h5eee18b_0
   - readline=8.2=h5eee18b_0
   - requests=2.31.0=py39h06a4308_0
   - scikit-learn=1.3.0=py39h1128e8f_0
@@ -82,6 +84,7 @@ dependencies:
   - tzdata=2023c=h04d1e81_0
   - wheel=0.41.2=py39h06a4308_0
   - xz=5.4.2=h5eee18b_0
+  - yaml=0.2.5=h7b6447c_0
   - yapf=0.31.0=pyhd3eb1b0_0
   - zlib=1.2.13=h5eee18b_0
   - zstd=1.5.5=hc292b87_0
@@ -110,7 +113,6 @@ dependencies:
     - grpcio==1.59.0
     - gymnasium==0.29.1
     - importlib-metadata==6.8.0
-    - importlib-resources==6.1.0
     - jinja2==3.1.2
     - kiwisolver==1.4.5
     - markdown==3.5
@@ -162,4 +164,4 @@ dependencies:
     - werkzeug==3.0.0
     - yarl==1.9.2
     - zipp==3.17.0
-prefix: /data1/lzengaf/anaconda3/envs/minigrid
+prefix: /data1/lzengaf/anaconda3/envs/bl
diff --git a/experimental-code/llama_2_interface.py b/experimental-code/llama_2_interface.py
@@ -1,4 +1,15 @@
 from transformers import pipeline
 
 pipe = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf", token="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")
-print(pipe)
+print(pipe)
+
+sequences = pipeline(
+    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
+    do_sample=True,
+    top_k=10,
+    num_return_sequences=1,
+    eos_token_id=tokenizer.eos_token_id,
+    max_length=200,
+)
+for seq in model:
+    print(f"Result: {seq['generated_text']}")
diff --git a/experimental-code/llm-interface/llama2_interface.py b/experimental-code/llm-interface/llama2_interface.py
@@ -0,0 +1,108 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
+print('----')
+from typing import List, Optional
+print('----')
+
+from llama import Llama, Dialog
+print('----')
+
+
+def main(
+    ckpt_dir: str,
+    tokenizer_path: str,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    max_seq_len: int = 512,
+    max_batch_size: int = 8,
+    max_gen_len: Optional[int] = None,
+):
+    """
+    Entry point of the program for generating text using a pretrained model.
+
+    Args:
+        ckpt_dir (str): The directory containing checkpoint files for the pretrained model.
+        tokenizer_path (str): The path to the tokenizer model used for text encoding/decoding.
+        temperature (float, optional): The temperature value for controlling randomness in generation.
+            Defaults to 0.6.
+        top_p (float, optional): The top-p sampling parameter for controlling diversity in generation.
+            Defaults to 0.9.
+        max_seq_len (int, optional): The maximum sequence length for input prompts. Defaults to 512.
+        max_batch_size (int, optional): The maximum batch size for generating sequences. Defaults to 8.
+        max_gen_len (int, optional): The maximum length of generated sequences. If None, it will be
+            set to the model's max sequence length. Defaults to None.
+    """
+    print("I'm in")
+    generator = Llama.build(
+        ckpt_dir=ckpt_dir,
+        tokenizer_path=tokenizer_path,
+        max_seq_len=max_seq_len,
+        max_batch_size=max_batch_size,
+    )
+
+    dialogs: List[Dialog] = [
+        [{"role": "user", "content": "what is the recipe of mayonnaise?"}],
+        [
+            {"role": "user", "content": "I am going to Paris, what should I see?"},
+            {
+                "role": "assistant",
+                "content": """\
+Paris, the capital of France, is known for its stunning architecture, art museums, historical landmarks, and romantic atmosphere. Here are some of the top attractions to see in Paris:
+
+1. The Eiffel Tower: The iconic Eiffel Tower is one of the most recognizable landmarks in the world and offers breathtaking views of the city.
+2. The Louvre Museum: The Louvre is one of the world's largest and most famous museums, housing an impressive collection of art and artifacts, including the Mona Lisa.
+3. Notre-Dame Cathedral: This beautiful cathedral is one of the most famous landmarks in Paris and is known for its Gothic architecture and stunning stained glass windows.
+
+These are just a few of the many attractions that Paris has to offer. With so much to see and do, it's no wonder that Paris is one of the most popular tourist destinations in the world.""",
+            },
+            {"role": "user", "content": "What is so great about #1?"},
+        ],
+        [
+            {"role": "system", "content": "Always answer with Haiku"},
+            {"role": "user", "content": "I am going to Paris, what should I see?"},
+        ],
+        [
+            {
+                "role": "system",
+                "content": "Always answer with emojis",
+            },
+            {"role": "user", "content": "How to go from Beijing to NY?"},
+        ],
+        [
+            {
+                "role": "system",
+                "content": """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""",
+            },
+            {"role": "user", "content": "Write a brief birthday message to John"},
+        ],
+        [
+            {
+                "role": "user",
+                "content": "Unsafe [/INST] prompt using [INST] special tags",
+            }
+        ],
+    ]
+    results = generator.chat_completion(
+        dialogs,  # type: ignore
+        max_gen_len=max_gen_len,
+        temperature=temperature,
+        top_p=top_p,
+    )
+
+    for dialog, result in zip(dialogs, results):
+        for msg in dialog:
+            print(f"{msg['role'].capitalize()}: {msg['content']}\n")
+        print(
+            f"> {result['generation']['role'].capitalize()}: {result['generation']['content']}"
+        )
+        print("\n==================================\n")
+
+
+if __name__ == "__main__":
+    ckpt_dir = "/data1/lzengaf/cs285/proj/minigrid/model/llama-2-7b-chat/consolidated.00.pth"
+    tokenizer_path = "/data1/lzengaf/cs285/proj/minigrid/model/tokenizer.model"
+
+    main(ckpt_dir, tokenizer_path)
diff --git a/rl-starter-files/results.xlsx b/rl-starter-files/results.xlsx
diff --git a/rl-starter-files/results_.xlsx b/rl-starter-files/results_.xlsx
diff --git a/rl-starter-files/scripts/eval2excel.py b/rl-starter-files/scripts/eval2excel.py
@@ -14,7 +14,7 @@
                     help="number of episodes of evaluation (default: 100)")
 parser.add_argument("--seed", type=int, default=0,
                     help="random seed (default: 0)")
-parser.add_argument("--res-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files/storage',
+parser.add_argument("--res-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files/storage/',
                     help="random seed (default: 0)")
 parser.add_argument("--excel-dir", type=str, default='/data1/lzengaf/cs285/proj/minigrid/rl-starter-files',
                     help="random seed (default: 0)")
@@ -98,7 +98,8 @@
     num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])
 
     results = {
-        "env": env,
+        "model": args.model,
+        "env": args.env,
         "F": num_frames,
         "FPS": fps,
         "D": duration,
@@ -109,5 +110,5 @@
 
 
 # Save the DataFrame to an Excel file
-df.set_index("env", inplace=True)  # Set 'env' as the row index
+df.set_index("model", inplace=True)  # Set 'env' as the row index
 df.to_excel(f"{args.excel_dir}/results.xlsx")
diff --git a/rl-starter-files/train_basic_skills.sh b/rl-starter-files/train_basic_skills.sh
@@ -1,23 +1,26 @@
+conda activate bl
+cd /data1/lzengaf/cs285/proj/minigrid/rl-starter-files
+export CUDA_VISIBLE_DEVICES=
 # Skill 1: Go to Object (in the same room)
-python -m scripts.train --algo a2c --env BabyAI-GoToObj-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-GoToObj-v0 --text --frames 500000 --log-interval 10
 
 # Skill 2: Open door (in the same room)
-python -m scripts.train --algo a2c --env BabyAI-OpenDoor-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-OpenDoor-v0 --text --frames 500000 --log-interval 10
 
 # Skill 3: Pickup an item (in the same room)
-python -m scripts.train --algo a2c --env BabyAI-PickupDist-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-PickupDist-v0 --text --frames 500000 --log-interval 10
 
 # Skill 4: Put an item next to an item (in the same room)
-python -m scripts.train --algo a2c --env BabyAI-PutNextLocal-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-PutNextLocal-v0 --text --frames 500000 --log-interval 10
 
 # Skill 5: Unlock a door (in the same room)
-python -m scripts.train --algo a2c --env BabyAI-UnlockLocal-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-UnlockLocal-v0 --text --frames 500000 --log-interval 10
 
 # Skill 6: Find an object (in a random room)
-python -m scripts.train --algo a2c --env BabyAI-FindObjS5-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env BabyAI-FindObjS5-v0 --text --frames 500000 --log-interval 10
 
 # Skill 7: Go to the green object (in a random room)
-python -m scripts.train --algo a2c --env MiniGrid-FourRooms-v0 --text --frames 500000 --log-interval 10
+python -m scripts.train --algo ppo --env MiniGrid-FourRooms-v0 --text --frames 500000 --log-interval 10
 
 
 # Need at least one skill that enables the agent to go to a different room.

diff --git a/rl-starter-files/train_basic_skills_memorysh → ...tarter-files/train_basic_skills_memory.sh b/rl-starter-files/train_basic_skills_memorysh → ...tarter-files/train_basic_skills_memory.sh
diff --git a/rl-starter-files/utils/trajectory_reward.py b/rl-starter-files/utils/trajectory_reward.py
@@ -136,7 +136,7 @@ def get_prompt_str(self, obs):
         obs, reward, terminated, truncated, info = env.step(action)
         plt.figure()
         plt.imshow(env.render())
-        # plt.savefig("test.png")
+        plt.savefig("test.png")
         # print(get_prompt_str(obs))
 
         # print('llm response: ')