Skip to content

Commit

Permalink
add sh
Browse files Browse the repository at this point in the history
  • Loading branch information
fpzh2011 committed Nov 4, 2024
1 parent 03d5b64 commit 555ade1
Show file tree
Hide file tree
Showing 23 changed files with 68 additions and 40 deletions.
1 change: 1 addition & 0 deletions infer/aquila.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Aquila/pipeline.py --model_path=/root/models/Aquila-7B --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/baichuan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Baichuan/pipeline.py --model_path=/root/models/Baichuan2-7B-Chat --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/chatglm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/ChatGLM/pipeline.py --model_path=/root/models/chatglm2-6b --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/llama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Llama/pipeline.py --model_path=/root/models/Llama-2-7b-chat-hf --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/qwen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Qwen/pipeline.py --model_path=/root/models/Qwen1.5-7B-Chat --mode=huggingface --device=xpu
10 changes: 5 additions & 5 deletions projects/Aquila/configs/aquila_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,18 @@
train.update(
dict(
output_dir="./sft_result/aquila",
train_micro_batch_size=4,
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=5,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=1,
num_accumulation_steps=8,
rdma_enabled=False,
train_with_fp16=True,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="cuda",
input_placement_device="xpu",
checkpointer=dict(
period=100,
max_to_keep=20,
Expand All @@ -85,7 +85,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="cuda",
device_type="xpu",
),
evaluation=dict(
enabled=False,
Expand Down
4 changes: 2 additions & 2 deletions projects/Aquila/utils/data_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@


def prepare(
destination_path: Path = Path("./alpaca_data"),
destination_path: Path = Path("./data/aquila"),
checkpoint_dir: Path = Path("/root/models/Aquila-7B"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand Down
2 changes: 1 addition & 1 deletion projects/Baichuan/configs/baichuan_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(BaichuanTokenizer)(
# pretrained_model_path=cfg.pretrained_model_path + "/tokenizer.model"
pretrained_model_path=cfg.pretrained_model_path + "/tokenizer.model"
)
12 changes: 6 additions & 6 deletions projects/Baichuan/configs/baichuan_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@
output_dir="./sft_result/baichuan",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=1,
rdma_enabled=True,
amp=dict(enabled=False),
num_accumulation_steps=8,
rdma_enabled=False,
amp=dict(enabled=True),
train_with_fp16=True,
activation_checkpoint=dict(enabled=True),
input_placement_device="cuda",
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=20,
Expand All @@ -83,7 +83,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="cuda",
device_type="xpu",
),
evaluation=dict(
enabled=True,
Expand Down
14 changes: 10 additions & 4 deletions projects/Baichuan/utils/data_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@


def prepare(
destination_path: Path = Path("./data/libai_xpu_alpaca"),
destination_path: Path = Path("./data/baichuan"),
checkpoint_dir: Path = Path("/root/models/Baichuan2-7B-Chat"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand All @@ -38,7 +38,7 @@ def prepare(
max_seq_length = config["max_position_embeddings"]

destination_path.mkdir(parents=True, exist_ok=True)
data_file_path = destination_path / data_file_name
data_file_path = Path(data_file_name)
logger.info("Loading data file...")
download_if_missing(data_file_path, data_file_url)
with open(data_file_path, "r", encoding="utf-8") as file:
Expand Down Expand Up @@ -118,7 +118,7 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:

padding = max_length - example.shape[0]
if padding > 0:
example = flow.cat((example, flow.zeros(padding, dtype=flow.long) - 1))
example = flow.cat((example.to_local(), flow.zeros(padding, dtype=flow.long) - 1))
elif padding < 0:
example = example[:max_length]
labels = copy.deepcopy(example)
Expand All @@ -129,10 +129,16 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:
labels[~label_mask] = -1
example = example[:-1]
labels = labels[1:]
if example_mask.is_global:
example_mask = example_mask.to_local()
example_mask = flow.where(
example_mask, flow.tensor(0, dtype=flow.float), flow.tensor(-float("inf"))
)
example_mask = example_mask[:-1]
if example.is_global:
example = example.to_local()
if labels.is_global:
labels = labels.to_local()
return {
"input_ids": example,
"labels": labels,
Expand Down
8 changes: 5 additions & 3 deletions projects/ChatGLM/configs/chatglm_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,15 @@
output_dir="./sft_result/chatglm",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=2 / 5,
num_accumulation_steps=1,
rdma_enabled=True,
num_accumulation_steps=8,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=1,
Expand All @@ -91,6 +92,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.num_layers,
device_type="xpu",
),
evaluation=dict(
enabled=False,
Expand Down
2 changes: 1 addition & 1 deletion projects/ChatGLM/utils/prepare_data_alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def prepare(
destination_path: Path = Path(os.environ["DATA_DIR"]),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
data_file_name: str = "alpaca_data_cleaned_archive.json",
data_file_url: str = "https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data_cleaned_archive.json", # noqa
Expand Down
4 changes: 2 additions & 2 deletions projects/Llama/configs/llama_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
eos_token_id=2,
pad_token_id=0,
# train
pretrained_model_path="meta-llama/Llama-2-7b-hf",
pretrained_model_path="/root/models/Llama-2-7b-chat-hf",
)

cfg = DictConfig(cfg)
Expand All @@ -57,5 +57,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(LlamaTokenizer)(
# pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model"
)
10 changes: 6 additions & 4 deletions projects/Llama/configs/llama_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,27 @@
train.update(
dict(
output_dir="./sft_result/llama",
train_micro_batch_size=4,
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=1,
num_accumulation_steps=8,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=20,
),
dist=dict(
data_parallel_size=1,
tensor_parallel_size=1,
pipeline_parallel_size=8,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="xpu",
),
evaluation=dict(
enabled=True,
Expand Down
14 changes: 10 additions & 4 deletions projects/Llama/utils/prepare_alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@


def prepare(
destination_path: Path = Path("alpaca_data"),
checkpoint_dir: Path = Path("meta-llama/Llama-2-7b-hf"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
destination_path: Path = Path(os.environ["DATA_DIR"]),
checkpoint_dir: Path = Path("/root/models/Llama-2-7b-chat-hf"),
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand Down Expand Up @@ -119,7 +119,7 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:

padding = max_length - example.shape[0]
if padding > 0:
example = flow.cat((example, flow.zeros(padding, dtype=flow.long) - 1))
example = flow.cat((example.to_local(), flow.zeros(padding, dtype=flow.long) - 1))
elif padding < 0:
example = example[:max_length]
labels = copy.deepcopy(example)
Expand All @@ -130,10 +130,16 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:
labels[~label_mask] = -1
example = example[:-1]
labels = labels[1:]
if example_mask.is_global:
example_mask = example_mask.to_local()
example_mask = flow.where(
example_mask, flow.tensor(0, dtype=flow.float), flow.tensor(-float("inf"))
)
example_mask = example_mask[:-1]
if example.is_global:
example = example.to_local()
if labels.is_global:
labels = labels.to_local()
return {
"input_ids": example,
"labels": labels,
Expand Down
4 changes: 2 additions & 2 deletions projects/Qwen/configs/qwen_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,6 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(Qwen2Tokenizer)(
# vocab_file="/root/models/Qwen1.5-7B/vocab.json",
# merges_file="/root/models/Qwen/Qwen1.5-7B/merges.txt",
vocab_file="/root/models/Qwen1.5-7B-Chat/vocab.json",
merges_file="/root/models/Qwen1.5-7B-Chat/merges.txt",
)
6 changes: 4 additions & 2 deletions projects/Qwen/configs/qwen_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
learning_rate = 1e-5
dataset_path = os.environ["DATA_DIR"]
pretrained_model_path = os.environ["MODEL_DIR"]

Expand Down Expand Up @@ -68,10 +68,11 @@
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=1,
num_accumulation_steps=8,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=20,
Expand All @@ -81,6 +82,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="xpu",
),
evaluation=dict(
enabled=False,
Expand Down
8 changes: 4 additions & 4 deletions projects/Qwen/utils/prepare_alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@


def prepare(
destination_path: Path = Path("./data/libai_xpu_alpaca"),
destination_path: Path = Path("./data/qwen"),
checkpoint_dir: Path = Path("/root/models/Qwen1.5-7B-Chat"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand Down Expand Up @@ -113,9 +113,9 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:
full_prompt_and_response = full_prompt + example["output"]

prompt = tokenizer.encode(full_prompt, device="cpu")
prompt = flow.tensor(prompt, dtype=flow.int, device="cpu")
prompt = flow.tensor(prompt, dtype=flow.int64, device="cpu")
example = tokenizer.encode(full_prompt_and_response, device="cpu")
example = flow.tensor(example, dtype=flow.int, device="cpu")
example = flow.tensor(example, dtype=flow.int64, device="cpu")

padding = max_length - example.shape[0]
if padding > 0:
Expand Down
1 change: 1 addition & 0 deletions train/aquila.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python tools/train_net.py --config-file=projects/Aquila/configs/aquila_sft.py
1 change: 1 addition & 0 deletions train/baichuan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DATA_DIR=./data/baichuan MODEL_DIR=/root/models/Baichuan2-7B-Chat python tools/train_net.py --config-file=projects/Baichuan/configs/baichuan_sft.py
1 change: 1 addition & 0 deletions train/chatglm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DATA_DIR=./data/chatglm CHATGLM_HF_DIR=/root/models/chatglm2-6b python tools/train_net.py --config-file=projects/ChatGLM/configs/chatglm_sft.py
1 change: 1 addition & 0 deletions train/llama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python tools/train_net.py --config-file=projects/Llama/configs/llama_sft.py
1 change: 1 addition & 0 deletions train/qwen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DATA_DIR=./data/qwen MODEL_DIR=/root/models/Qwen1.5-7B-Chat python projects/Qwen/train_net.py --config-file=projects/Qwen/configs/qwen_sft.py

0 comments on commit 555ade1

Please sign in to comment.