|
| 1 | +import os |
| 2 | + |
| 3 | +import torch |
| 4 | + |
| 5 | +from llamafactory.hparams import get_train_args |
| 6 | +from llamafactory.model import load_model, load_tokenizer |
| 7 | + |
| 8 | + |
| 9 | +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") |
| 10 | + |
| 11 | +TRAINING_ARGS = { |
| 12 | + "model_name_or_path": TINY_LLAMA, |
| 13 | + "stage": "sft", |
| 14 | + "do_train": True, |
| 15 | + "finetuning_type": "freeze", |
| 16 | + "dataset": "llamafactory/tiny_dataset", |
| 17 | + "dataset_dir": "ONLINE", |
| 18 | + "template": "llama3", |
| 19 | + "cutoff_len": 1024, |
| 20 | + "overwrite_cache": True, |
| 21 | + "output_dir": "dummy_dir", |
| 22 | + "overwrite_output_dir": True, |
| 23 | + "fp16": True, |
| 24 | +} |
| 25 | + |
| 26 | + |
| 27 | +def test_freeze_all_modules(): |
| 28 | + model_args, _, _, finetuning_args, _ = get_train_args( |
| 29 | + { |
| 30 | + "freeze_trainable_layers": 1, |
| 31 | + **TRAINING_ARGS, |
| 32 | + } |
| 33 | + ) |
| 34 | + tokenizer_module = load_tokenizer(model_args) |
| 35 | + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) |
| 36 | + for name, param in model.named_parameters(): |
| 37 | + if name.startswith("model.layers.1."): |
| 38 | + assert param.requires_grad is True |
| 39 | + assert param.dtype == torch.float32 |
| 40 | + else: |
| 41 | + assert param.requires_grad is False |
| 42 | + assert param.dtype == torch.float16 |
| 43 | + |
| 44 | + |
| 45 | +def test_freeze_extra_modules(): |
| 46 | + model_args, _, _, finetuning_args, _ = get_train_args( |
| 47 | + { |
| 48 | + "freeze_trainable_layers": 1, |
| 49 | + "freeze_extra_modules": "embed_tokens,lm_head", |
| 50 | + **TRAINING_ARGS, |
| 51 | + } |
| 52 | + ) |
| 53 | + tokenizer_module = load_tokenizer(model_args) |
| 54 | + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) |
| 55 | + for name, param in model.named_parameters(): |
| 56 | + if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): |
| 57 | + assert param.requires_grad is True |
| 58 | + assert param.dtype == torch.float32 |
| 59 | + else: |
| 60 | + assert param.requires_grad is False |
| 61 | + assert param.dtype == torch.float16 |
0 commit comments