Skip to content
8 changes: 4 additions & 4 deletions src/peft/tuners/boft/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,10 +457,10 @@ def cayley_batch(self, data):
skew_mat = 0.5 * (data - data.transpose(1, 2))
id_mat = torch.eye(r, device=data.device).unsqueeze(0).expand(b, r, c)

# Perform the Cayley parametrization
# Perform the Cayley parametrization, must be in float32
Q = torch.linalg.solve(id_mat + skew_mat, id_mat - skew_mat, left=False)

return Q
return Q.to(data.dtype)


class Linear(nn.Module, BOFTLayer):
Expand Down Expand Up @@ -586,7 +586,7 @@ def get_delta_weight(self, adapter) -> tuple[torch.Tensor, torch.Tensor]:
block_diagonal_butterfly = torch.block_diag(*torch.unbind(orth_rotate_butterfly))
block_diagonal_butterfly = block_diagonal_butterfly.unsqueeze(0)

boft_P = self.boft_P.to(block_diagonal_butterfly.device)
boft_P = self.boft_P.to(block_diagonal_butterfly.device, block_diagonal_butterfly.dtype)
butterfly_oft_mat_batch = torch.bmm(block_diagonal_butterfly, boft_P.permute(0, 2, 1))
butterfly_oft_mat_batch = torch.bmm(boft_P, butterfly_oft_mat_batch)
butterfly_oft_mat = butterfly_oft_mat_batch[0]
Expand Down Expand Up @@ -919,7 +919,7 @@ def get_delta_weight(self, adapter) -> tuple[torch.Tensor, torch.Tensor]:
block_diagonal_butterfly = torch.block_diag(*torch.unbind(orth_rotate_butterfly))
block_diagonal_butterfly = block_diagonal_butterfly.unsqueeze(0)

boft_P = self.boft_P.to(block_diagonal_butterfly.device)
boft_P = self.boft_P.to(block_diagonal_butterfly.device, block_diagonal_butterfly.dtype)
butterfly_oft_mat_batch = torch.bmm(block_diagonal_butterfly, boft_P.permute(0, 2, 1))
butterfly_oft_mat_batch = torch.bmm(boft_P, butterfly_oft_mat_batch)
butterfly_oft_mat = butterfly_oft_mat_batch[0]
Expand Down
4 changes: 4 additions & 0 deletions src/peft/tuners/c3a/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,9 @@ def _create_new_module(c3a_config, adapter_name, target, **kwargs):

if isinstance(target_base_layer, torch.nn.Linear):
new_module = C3ALinear(target, adapter_name, **kwargs)
else:
raise ValueError(
f"Target module {target} is not supported. Currently, only `torch.nn.Linear` is supported."
)

return new_module
12 changes: 6 additions & 6 deletions src/peft/tuners/fourierft/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from transformers.pytorch_utils import Conv1D

from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
from peft.utils.other import transpose


class FourierFTLayer(BaseTunerLayer):
Expand Down Expand Up @@ -139,7 +140,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
# Note that safe_merge will be slower than the normal merge
# because of the copy operation.
orig_weights = base_layer.weight.data.clone()
orig_weights += self.get_delta_weight(active_adapter)
orig_weights += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)

if not torch.isfinite(orig_weights).all():
raise ValueError(
Expand All @@ -148,7 +149,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N

base_layer.weight.data = orig_weights
else:
base_layer.weight.data += self.get_delta_weight(active_adapter)
base_layer.weight.data += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
self.merged_adapters.append(active_adapter)

def unmerge(self) -> None:
Expand All @@ -161,10 +162,9 @@ def unmerge(self) -> None:
while len(self.merged_adapters) > 0:
active_adapter = self.merged_adapters.pop()
if active_adapter in self.fourierft_spectrum.keys():
self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)

def get_delta_weight(self, adapter) -> torch.Tensor:
return super().get_delta_weight(adapter)
self.get_base_layer().weight.data -= transpose(
self.get_delta_weight(active_adapter), self.fan_in_fan_out
)

def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
previous_dtype = x.dtype
Expand Down
2 changes: 1 addition & 1 deletion src/peft/tuners/ln_tuning/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
if self.merged:
self.unmerge()
result = self.base_layer(x, *args, **kwargs)
elif self.merged:
elif self.merged or (len(self.active_adapters) == 0):
result = self.base_layer(x, *args, **kwargs)
else:
if len(self.active_adapters) != 1:
Expand Down
12 changes: 6 additions & 6 deletions src/peft/tuners/waveft/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from transformers.pytorch_utils import Conv1D

from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
from peft.utils.other import transpose

from .constants import WAVELET_REDUCTIONS
from .waverec2d import waverec2d
Expand Down Expand Up @@ -237,7 +238,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
# Note that safe_merge will be slower than the normal merge
# because of the copy operation.
orig_weights = base_layer.weight.data.clone()
orig_weights += self.get_delta_weight(active_adapter)
orig_weights += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)

if not torch.isfinite(orig_weights).all():
raise ValueError(
Expand All @@ -246,7 +247,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N

base_layer.weight.data = orig_weights
else:
base_layer.weight.data += self.get_delta_weight(active_adapter)
base_layer.weight.data += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
self.merged_adapters.append(active_adapter)

def unmerge(self) -> None:
Expand All @@ -259,10 +260,9 @@ def unmerge(self) -> None:
while len(self.merged_adapters) > 0:
active_adapter = self.merged_adapters.pop()
if active_adapter in self.waveft_spectrum.keys():
self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)

def get_delta_weight(self, adapter) -> torch.Tensor:
return super().get_delta_weight(adapter)
self.get_base_layer().weight.data -= transpose(
self.get_delta_weight(active_adapter), self.fan_in_fan_out
)

def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
previous_dtype = x.dtype
Expand Down
4 changes: 0 additions & 4 deletions tests/test_decoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,6 @@ def _skip_alora_no_activation(config_cls, config_kwargs):
class TestDecoderModels(PeftCommonTester):
transformers_class = AutoModelForCausalLM

def skipTest(self, reason=""):
# for backwards compatibility with unittest style test classes
pytest.skip(reason)

def prepare_inputs_for_testing(self):
input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
Expand Down
4 changes: 0 additions & 4 deletions tests/test_encoder_decoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,6 @@
class TestEncoderDecoderModels(PeftCommonTester):
transformers_class = AutoModelForSeq2SeqLM

def skipTest(self, reason=""):
# for backwards compatibility with unittest style test classes
pytest.skip(reason)

def prepare_inputs_for_testing(self):
input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
decoder_input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
Expand Down
4 changes: 0 additions & 4 deletions tests/test_feature_extraction_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,6 @@ class TestPeftFeatureExtractionModel(PeftCommonTester):

transformers_class = AutoModel

def skipTest(self, reason=""):
# for backwards compatibility with unittest style test classes
pytest.skip(reason)

def prepare_inputs_for_testing(self):
input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpu_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -4558,7 +4558,7 @@ def _test_model(self, model, precision):
input_ids = torch.randint(0, 1000, (2, 10)).to(self.device)
if precision == torch.bfloat16:
if not is_bf16_available():
self.skipTest("Bfloat16 not supported on this device")
pytest.skip("Bfloat16 not supported on this device")

# Forward pass with test precision
with torch.autocast(enabled=True, dtype=precision, device_type=self.device):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def test_target_first_layer_same_type(self, config0, config1):
def test_deeply_nested(self):
# a somewhat absurdly nested model using different adapter types
if platform.system() == "Linux":
self.skipTest("This test fails but only on GitHub CI with Linux systems.")
pytest.skip("This test fails but only on GitHub CI with Linux systems.")

atol = 1e-5
rtol = 1e-5
Expand Down
4 changes: 0 additions & 4 deletions tests/test_seq_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,6 @@ class TestSequenceClassificationModels(PeftCommonTester):

transformers_class = AutoModelForSequenceClassification

def skipTest(self, reason=""):
# for backwards compatibility with unittest style test classes
pytest.skip(reason)

def prepare_inputs_for_testing(self):
input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
Expand Down
4 changes: 0 additions & 4 deletions tests/test_target_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,6 @@ class TestDecoderModelsTargetParameters(PeftCommonTester):
# generally, nothing is broken.
transformers_class = MyAutoModelForCausalLM

def skipTest(self, reason=""):
# for backwards compatibility with unittest style test classes
pytest.skip(reason)

def prepare_inputs_for_testing(self):
input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
Expand Down
Loading
Loading