Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xpu #558

Closed
wants to merge 13 commits into from
Closed

Xpu #558

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions infer/aquila.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Aquila/pipeline.py --model_path=/root/models/Aquila-7B --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/baichuan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Baichuan/pipeline.py --model_path=/root/models/Baichuan2-7B-Chat --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/chatglm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/ChatGLM/pipeline.py --model_path=/root/models/chatglm2-6b --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/llama.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Llama/pipeline.py --model_path=/root/models/Llama-2-7b-chat-hf --mode=huggingface --device=xpu
1 change: 1 addition & 0 deletions infer/qwen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python projects/Qwen/pipeline.py --model_path=/root/models/Qwen1.5-7B-Chat --mode=huggingface --device=xpu
2 changes: 1 addition & 1 deletion libai/engine/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def __init__(self, cfg):
dist.synchronize()
start_time = time.time()
logger.info("> Start building model...")
self.model = self.build_model(cfg)
self.model = self.build_model(cfg).half()

dist.synchronize()
logger.info(
Expand Down
3 changes: 2 additions & 1 deletion libai/layers/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def forward(self, input_ids):
# [B, S(0)] x [S(0), B] --> [S(0), P]
# ↑ ↑ ↑
# embed input_ids input_embeds
input_embeds = flow._C.gather(weight, input_ids, axis=0)
with flow.no_grad():
input_embeds = flow._C.gather(weight, input_ids, axis=0)
# Set the embeds sbp from [S(0), P] --> [S(0), B] to get complete embedding results.
input_embeds = input_embeds.to_global(sbp=dist.get_hidden_sbp())

Expand Down
12 changes: 6 additions & 6 deletions projects/Aquila/configs/aquila_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
dataset_path = "./alpaca_data"
dataset_path = "./data/aquila"
pretrained_model_path = "/root/models/Aquila-7B"

# graph & optim
Expand Down Expand Up @@ -63,10 +63,10 @@

train.update(
dict(
output_dir="./sft_result",
train_micro_batch_size=4,
output_dir="./sft_result/aquila",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=5,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
Expand All @@ -75,7 +75,7 @@
train_with_fp16=True,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="cuda",
input_placement_device="xpu",
checkpointer=dict(
period=100,
max_to_keep=20,
Expand All @@ -85,7 +85,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="cuda",
device_type="xpu",
),
evaluation=dict(
enabled=False,
Expand Down
4 changes: 2 additions & 2 deletions projects/Aquila/utils/data_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@


def prepare(
destination_path: Path = Path("./alpaca_data"),
destination_path: Path = Path("./data/aquila"),
checkpoint_dir: Path = Path("/root/models/Aquila-7B"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand Down
2 changes: 1 addition & 1 deletion projects/Baichuan/configs/baichuan_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(BaichuanTokenizer)(
# pretrained_model_path=cfg.pretrained_model_path + "/tokenizer.model"
pretrained_model_path=cfg.pretrained_model_path + "/tokenizer.model"
)
18 changes: 9 additions & 9 deletions projects/Baichuan/configs/baichuan_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
dataset_path = "./alpaca_data"
pretrained_model_path = "/root/models/Llama-2-7b-chat-hf"
learning_rate = 1e-5
dataset_path = os.environ["DATA_DIR"]
pretrained_model_path = os.environ["MODEL_DIR"]

# graph & optim
graph["enabled"] = False
Expand Down Expand Up @@ -61,19 +61,19 @@

train.update(
dict(
output_dir="./sft_result",
output_dir="./sft_result/baichuan",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=8,
rdma_enabled=True,
amp=dict(enabled=False),
rdma_enabled=False,
amp=dict(enabled=True),
train_with_fp16=True,
activation_checkpoint=dict(enabled=True),
input_placement_device="cuda",
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=20,
Expand All @@ -83,7 +83,7 @@
tensor_parallel_size=1,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="cuda",
device_type="xpu",
),
evaluation=dict(
enabled=True,
Expand Down
2 changes: 1 addition & 1 deletion projects/Baichuan/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def main(config_file, model_path, mode, device):
)

text = [
"Give three tips for staying healthy.",
"Wikipedia is a free online",
]
output = pipeline(inputs=text)
if dist.is_main_process():
Expand Down
14 changes: 10 additions & 4 deletions projects/Baichuan/utils/data_prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@


def prepare(
destination_path: Path = Path("./data/libai_xpu_alpaca"),
destination_path: Path = Path("./data/baichuan"),
checkpoint_dir: Path = Path("/root/models/Baichuan2-7B-Chat"),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = "alpaca_data_cleaned_archive.json",
Expand All @@ -38,7 +38,7 @@ def prepare(
max_seq_length = config["max_position_embeddings"]

destination_path.mkdir(parents=True, exist_ok=True)
data_file_path = destination_path / data_file_name
data_file_path = Path(data_file_name)
logger.info("Loading data file...")
download_if_missing(data_file_path, data_file_url)
with open(data_file_path, "r", encoding="utf-8") as file:
Expand Down Expand Up @@ -118,7 +118,7 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:

padding = max_length - example.shape[0]
if padding > 0:
example = flow.cat((example, flow.zeros(padding, dtype=flow.long) - 1))
example = flow.cat((example.to_local(), flow.zeros(padding, dtype=flow.long) - 1))
elif padding < 0:
example = example[:max_length]
labels = copy.deepcopy(example)
Expand All @@ -129,10 +129,16 @@ def prepare_sample(example: dict, tokenizer, max_length: int) -> dict:
labels[~label_mask] = -1
example = example[:-1]
labels = labels[1:]
if example_mask.is_global:
example_mask = example_mask.to_local()
example_mask = flow.where(
example_mask, flow.tensor(0, dtype=flow.float), flow.tensor(-float("inf"))
)
example_mask = example_mask[:-1]
if example.is_global:
example = example.to_local()
if labels.is_global:
labels = labels.to_local()
return {
"input_ids": example,
"labels": labels,
Expand Down
6 changes: 3 additions & 3 deletions projects/ChatGLM/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def scaled_dot_product_attention(
def forward(self, query_layer, key_layer, value_layer, attention_mask=None):
# query_layer: [sq, b, np, hn] -[premute]-> [batch_size, head_num, seq_len, hidden_size]
query_layer, key_layer, value_layer = [
k.permute(1, 2, 0, 3) for k in [query_layer, key_layer, value_layer]
k.transpose(1, 2).transpose(0, 2) for k in [query_layer, key_layer, value_layer]
]
if attention_mask is None and query_layer.shape[2] == key_layer.shape[2]:
context_layer = self.scaled_dot_product_attention(
Expand All @@ -194,7 +194,7 @@ def forward(self, query_layer, key_layer, value_layer, attention_mask=None):
query_layer, key_layer, value_layer, attention_mask
)

context_layer = context_layer.permute(2, 0, 1, 3)
context_layer = context_layer.transpose(0, 1).transpose(0, 2)
context_layer = context_layer.flatten(2)
return context_layer

Expand Down Expand Up @@ -709,7 +709,7 @@ def get_prompt(self, batch_size):
)
# seq_len, b, nh, hidden_size
past_key_values = self.dropout(past_key_values)
past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
past_key_values = past_key_values.transpose(0, 2).split(2)
return past_key_values

def forward(
Expand Down
16 changes: 9 additions & 7 deletions projects/ChatGLM/configs/chatglm_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@

# Hyperparameters
weight_decay = 0.1
learning_rate = 2e-5
learning_rate = 1e-7
max_source_len = 128
max_target_len = 128
max_length = 256
dataset_path = os.environ["DATA_DIR"]
pretrained_model_path = os.environ["CHATGLM_HF_DIR"]

# graph & optim
graph["enabled"] = True
graph["enabled"] = False

optim.update(
dict(
Expand Down Expand Up @@ -71,26 +71,28 @@

train.update(
dict(
output_dir="./sft_result",
output_dir="./sft_result/chatglm",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=10,
log_period=1,
warmup_ratio=2 / 5,
num_accumulation_steps=8,
rdma_enabled=True,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=1,
),
dist=dict(
data_parallel_size=1,
tensor_parallel_size=1,
pipeline_parallel_size=4,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.num_layers,
device_type="xpu",
),
evaluation=dict(
enabled=False,
Expand Down
20 changes: 0 additions & 20 deletions projects/ChatGLM/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@

from libai.data.structures import DistTensorData, Instance
from libai.utils import distributed as dist
from libai.utils.logger import setup_logger

IGNORE_INDEX = -100
logger = setup_logger()


class ChatGLMTrainDataset(Dataset):
Expand All @@ -40,7 +38,6 @@ def __init__(self, path, tokenizer, max_source_len=128, max_target_len=128, max_
self.max_len = max_length

example = self._preprocess(0)
self.log_dataset_example(example)

def _preprocess(self, idx):
# inputs with format `<bos> X Y <eos>` labels with format `<ignore> ... <ignore> Y <eos>`
Expand Down Expand Up @@ -71,23 +68,6 @@ def _preprocess(self, idx):

return {"input_ids": input_ids, "labels": labels}

def log_dataset_example(self, example: Dict[str, List[int]]) -> None:
if dist.is_main_process():
logger.info("input_ids:\n{}".format(example["input_ids"]))
logger.info(
"inputs:\n{}".format(
self.tokenizer.decode(example["input_ids"], skip_special_tokens=False)
)
)
logger.info("label_ids:\n{}".format(example["labels"]))
logger.info(
"labels:\n{}".format(
self.tokenizer.decode(
list(filter(lambda x: x != IGNORE_INDEX, example["labels"])),
skip_special_tokens=False,
)
)
)

def __len__(self):
return len(self.data["prompt"])
Expand Down
1 change: 0 additions & 1 deletion projects/ChatGLM/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ def main(config_file, model_path, mode, device):
"a dog is flying on the sky",
"Wikipedia is a free online",
"what is beam search?",
"what is beam search?",
]
pipeline = TextGenerationPipeline(
config_file,
Expand Down
2 changes: 1 addition & 1 deletion projects/ChatGLM/utils/prepare_data_alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def prepare(
destination_path: Path = Path(os.environ["DATA_DIR"]),
test_split_fraction: float = 0.03865, # to get exactly 2000 test samples,
test_split_fraction: float = 0.60, # to get exactly 2000 test samples,
seed: int = 42,
data_file_name: str = "alpaca_data_cleaned_archive.json",
data_file_url: str = "https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data_cleaned_archive.json", # noqa
Expand Down
4 changes: 2 additions & 2 deletions projects/Llama/configs/llama_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
eos_token_id=2,
pad_token_id=0,
# train
pretrained_model_path="meta-llama/Llama-2-7b-hf",
pretrained_model_path="/root/models/Llama-2-7b-chat-hf",
)

cfg = DictConfig(cfg)
Expand All @@ -57,5 +57,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(LlamaTokenizer)(
# pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model"
)
16 changes: 9 additions & 7 deletions projects/Llama/configs/llama_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
dataset_path = "alpaca_data"
pretrained_model_path = "meta-llama/Llama-2-7b-hf"
dataset_path = "./data/llama"
pretrained_model_path = "/root/models/Llama-2-7b-chat-hf"

# graph & optim
graph["enabled"] = False
Expand Down Expand Up @@ -61,26 +61,28 @@

train.update(
dict(
output_dir="./sft_result",
train_micro_batch_size=4,
output_dir="./sft_result/llama",
train_micro_batch_size=1,
test_micro_batch_size=1,
train_epoch=3,
train_epoch=1,
train_iter=1,
log_period=10,
log_period=1,
warmup_ratio=1 / 3,
num_accumulation_steps=8,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
input_placement_device="xpu",
checkpointer=dict(
period=5000,
max_to_keep=20,
),
dist=dict(
data_parallel_size=1,
tensor_parallel_size=1,
pipeline_parallel_size=8,
pipeline_parallel_size=1,
pipeline_num_layers=cfg.hidden_layers,
device_type="xpu",
),
evaluation=dict(
enabled=True,
Expand Down
Loading