Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agentdriver/execution/gen_finetune_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import random
from pathlib import Path

from agentdriver.planning.planning_prmopts import planning_system_message as system_message
from agentdriver.planning.planning_prompts import planning_system_message as system_message
from agentdriver.planning.motion_planning import generate_messages

def generate_traj_finetune_data(data_path, data_file, sample_ratio=1.0, use_gt_cot=False):
Expand Down
Empty file.
16 changes: 16 additions & 0 deletions agentdriver/execution/llama/collect_planner_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## Run tool use, memory retrieval, and reasoning to generate training data for planning and testing input for planner

from pathlib import Path

from agentdriver.main.language_agent import LanguageAgent
from agentdriver.llm_core.api_keys import OPENAI_ORG, OPENAI_API_KEY

import openai
openai.organization = OPENAI_ORG
openai.api_key = OPENAI_API_KEY

if __name__ == "__main__":
data_path = Path('data/')
split = 'train'
language_agent = LanguageAgent(data_path, split, model_name="gpt-3.5-turbo-0613", finetune_cot=False, verbose=False)
language_agent.collect_planner_input(invalid_tokens=None)
141 changes: 141 additions & 0 deletions agentdriver/execution/llama/fine_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
## finetuning motion planner
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
TrainingArguments,
pipeline,
logging,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import (
FullOptimStateDictConfig,
FullStateDictConfig,
)

from agentdriver.execution.llama.gen_finetune_data import generate_traj_finetune_data

def setup_fsdp():
fsdp_plugin = FullyShardedDataParallelPlugin(
state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
optim_state_dict_config=FullOptimStateDictConfig(
offload_to_cpu=True, rank0_only=False
),
)
return Accelerator(fsdp_plugin=fsdp_plugin)

def train_llama(data_path, sample_ratio=0.1):
# Generate training data
print("Generating fine-tuning data ...")
generate_traj_finetune_data(data_path=data_path, data_file="data_samples_train.json",
sample_ratio=sample_ratio, use_gt_cot=False)

# Setup model and tokenizer
base_model_id = "NousResearch/Meta-Llama-3-8B"
max_length = 2048 # Adjust based on your needs

print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(
base_model_id,
padding_side="left",
add_eos_token=True,
add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

# Load datasets
train_file = f"finetune_planner_{int(sample_ratio * 100)}.csv"
train_dataset = load_dataset("csv", data_files=os.path.join(data_path, train_file), split="train")

# Setup quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
base_model_id,
quantization_config=bnb_config,
device_map="auto"
)

# Prepare model for training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# Setup LoRA
config = LoraConfig(
r=8,
lora_alpha=16,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
"lm_head",
],
bias="none",
lora_dropout=0.05,
task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

# Setup accelerator
accelerator = setup_fsdp()
model = accelerator.prepare_model(model)

if torch.cuda.device_count() > 1:
model.is_parallelizable = True
model.model_parallel = True

# Setup training arguments
training_args = TrainingArguments(
output_dir=f"./llama_planner_{int(sample_ratio * 100)}",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
optim="paged_adamw_8bit",
learning_rate=2.5e-5,
logging_steps=10,
bf16=True,
save_strategy="steps",
save_steps=100,
evaluation_strategy="no",
do_eval=False,
report_to="none",
)

# Initialize trainer
trainer = SFTTrainer(
model=model,
train_dataset=train_dataset,
args=training_args,
tokenizer=tokenizer,
max_seq_length=max_length,
)

# Train
print("Starting training...")
model.config.use_cache = False
trainer.train()

# Save the model
output_dir = f"./llama_planner_{int(sample_ratio * 100)}_final"
trainer.save_model(output_dir)
print(f"Model saved to {output_dir}")

if __name__ == "__main__":
train_llama(data_path="data/finetune", sample_ratio=0.1)
41 changes: 41 additions & 0 deletions agentdriver/execution/llama/gen_finetune_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json
import random
from pathlib import Path

from agentdriver.planning.planning_prompts import planning_system_message as system_message
from agentdriver.planning.generate_messages import generate_messages

def generate_traj_finetune_data(data_path, data_file, sample_ratio=1.0, use_gt_cot=False):
data_samples = json.load(open(Path(data_path) / Path(data_file), 'r'))

sample_size = int(len(data_samples) * sample_ratio)
data_samples = random.sample(data_samples, sample_size)

train_data = []
for data_sample in data_samples:
token, user_message, assistant_message = generate_messages(data_sample, use_gt_cot=use_gt_cot)
assert assistant_message is not None

# Format for Llama fine-tuning
full_prompt = f"""### System: {system_message}
### Human: {user_message}
# ### Assistant: {assistant_message}"""

train_data.append({
"text": full_prompt
})

print("#### Data Summarization ####")
print(f"Number of total samples: {len(train_data)}")

# Save as CSV for Llama training
saved_file_name = f"finetune_planner_{int(sample_ratio * 100)}.csv"
with open(Path(data_path) / Path(saved_file_name), "w") as f:
f.write("text\n") # CSV header
for item in train_data:
# Escape quotes and newlines for CSV
text = item["text"].replace('"', '""').replace('\n', '\\n')
f.write(f'"{text}"\n')

if __name__ == "__main__":
generate_traj_finetune_data(data_path="data/finetune", data_file="data_samples_train.json", use_gt_cot=False)
39 changes: 39 additions & 0 deletions agentdriver/execution/llama/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
## Run tool use, memory retrieval, and reasoning to generate training data for planning and testing input for planner

from pathlib import Path
import time
import json

from agentdriver.main.language_agent import LanguageAgent
from agentdriver.llm_core.api_keys import OPENAI_ORG, OPENAI_API_KEY, FINETUNE_PLANNER_NAME

import openai
openai.organization = OPENAI_ORG
openai.api_key = OPENAI_API_KEY

if __name__ == "__main__":
data_path = Path('data/')
split = 'val'
language_agent = LanguageAgent(
data_path,
split,
model_name="gpt-3.5-turbo-0613",
planner_model_name=FINETUNE_PLANNER_NAME,
finetune_cot=False,
verbose=False
)

current_time = time.strftime("%D:%H:%M")
current_time = current_time.replace("/", "_")
current_time = current_time.replace(":", "_")
save_path = Path("experiments") / Path(current_time)
save_path.mkdir(exist_ok=True, parents=True)
with open("data/finetune/data_samples_val.json", "r") as f:
data_samples = json.load(f)

planning_traj_dict = language_agent.inference_all(
data_samples=data_samples,
data_path=Path(data_path) / Path(split),
save_path=save_path,
)

Loading