From dc9a52669e2532f5f3eff6dd0da46177cd7ff7fd Mon Sep 17 00:00:00 2001 From: Benjamin Bossan Date: Thu, 16 Oct 2025 11:46:19 +0200 Subject: [PATCH] TST Add GPU training tests for p/prefix tuning There are already GPU tests for P-tuning and prefix tuning, but none that involve actual training. This PR adds two training tests (analog to what we have with LoRA) using transformers Trainer. This can be important because changes in Trainer, for instance how cache is handled, can impact the possibility of training with prompt learning methods. A recent example for a change is https://github.com/huggingface/transformers/pull/41585. Although it doesn't break anything, it shows the potential for breaking in the Trainer class that could impact these PEFT methods. --- tests/test_gpu_examples.py | 86 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py index e37d78d49e..d2a2a08539 100644 --- a/tests/test_gpu_examples.py +++ b/tests/test_gpu_examples.py @@ -4638,6 +4638,7 @@ def test_boft_half_conv(self): class TestPTuningReproducibility: device = infer_device() + causal_lm_model_id = "facebook/opt-125m" @require_non_cpu @require_deterministic_for_xpu @@ -4674,6 +4675,48 @@ def test_p_tuning_exactly_reproducible_after_loading(self, tmp_path): torch.testing.assert_close(output_loaded, output_peft) torch.testing.assert_close(gen_loaded, gen_peft) + @require_bitsandbytes + @pytest.mark.single_gpu_tests + def test_p_tuning_causal_lm_training_8bit_bnb(self): + # test is analog to PeftBnbGPUExampleTests.test_causal_lm_training + with tempfile.TemporaryDirectory() as tmp_dir: + model = AutoModelForCausalLM.from_pretrained( + self.causal_lm_model_id, + quantization_config=BitsAndBytesConfig(load_in_8bit=True), + device_map="auto", + ) + + tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) + config = PromptEncoderConfig(task_type="CAUSAL_LM", num_virtual_tokens=20, encoder_hidden_size=128) + model = get_peft_model(model, config) + + data = load_dataset_english_quotes() + data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) + + trainer = Trainer( + model=model, + train_dataset=data["train"], + args=TrainingArguments( + per_device_train_batch_size=4, + warmup_steps=2, + max_steps=3, + learning_rate=2e-4, + fp16=True, + logging_steps=1, + output_dir=tmp_dir, + ), + data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), + ) + trainer.train() + + model.cpu().save_pretrained(tmp_dir) + + assert "adapter_config.json" in os.listdir(tmp_dir) + assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) + + # assert loss is not None + assert trainer.state.log_history[-1]["train_loss"] is not None + @pytest.mark.single_gpu_tests class TestLowCpuMemUsageDifferentDevices: @@ -4947,6 +4990,7 @@ def test_alora_forward_consistency(self, model, model_bnb, peft_config): @pytest.mark.multi_gpu_tests class TestPrefixTuning: device = infer_device() + causal_lm_model_id = "facebook/opt-125m" @require_torch_multi_accelerator def test_prefix_tuning_multiple_devices_decoder_model(self): @@ -5007,6 +5051,48 @@ def test_prefix_tuning_multiple_devices_encoder_decoder_model(self): model = get_peft_model(model, peft_config) model.generate(**inputs) # does not raise + @require_bitsandbytes + @pytest.mark.single_gpu_tests + def test_prefix_tuning_causal_lm_training_8bit_bnb(self): + # test is analog to PeftBnbGPUExampleTests.test_causal_lm_training + with tempfile.TemporaryDirectory() as tmp_dir: + model = AutoModelForCausalLM.from_pretrained( + self.causal_lm_model_id, + quantization_config=BitsAndBytesConfig(load_in_8bit=True), + device_map="auto", + ) + + tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) + config = PrefixTuningConfig(num_virtual_tokens=10, task_type=TaskType.CAUSAL_LM) + model = get_peft_model(model, config) + + data = load_dataset_english_quotes() + data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) + + trainer = Trainer( + model=model, + train_dataset=data["train"], + args=TrainingArguments( + per_device_train_batch_size=4, + warmup_steps=2, + max_steps=3, + learning_rate=2e-4, + fp16=True, + logging_steps=1, + output_dir=tmp_dir, + ), + data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), + ) + trainer.train() + + model.cpu().save_pretrained(tmp_dir) + + assert "adapter_config.json" in os.listdir(tmp_dir) + assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) + + # assert loss is not None + assert trainer.state.log_history[-1]["train_loss"] is not None + @pytest.mark.skipif(not (torch.cuda.is_available() or is_xpu_available()), reason="test requires a GPU or XPU") @pytest.mark.single_gpu_tests