diff --git a/src/adapters/configuration/adapter_config.py b/src/adapters/configuration/adapter_config.py index 63039a8459..c3b45ca313 100644 --- a/src/adapters/configuration/adapter_config.py +++ b/src/adapters/configuration/adapter_config.py @@ -438,6 +438,8 @@ class LoRAConfig(AdapterConfig): Defaults to False. output_lora (bool, optional): If True, add LoRA to the output MLP weights of a model. Defaults to False. + leave_out (:obj:`List[int]`, optional): + The IDs of the layers (starting at 0) where NO adapter modules should be added. r (int, optional): The rank of the LoRA layer. Defaults to 8. alpha (int, optional): The hyperparameter used for scaling the LoRA reparametrization. Defaults to 8. dropout (float, optional): The dropout rate used in the LoRA layer. Defaults to 0.0. @@ -460,6 +462,7 @@ class LoRAConfig(AdapterConfig): selfattn_lora: bool = True intermediate_lora: bool = False output_lora: bool = False + leave_out: List[int] = field(default_factory=list) r: int = 8 alpha: int = 8 @@ -481,6 +484,7 @@ class IA3Config(LoRAConfig): selfattn_lora: bool = True intermediate_lora: bool = True output_lora: bool = False + leave_out: List[int] = field(default_factory=list) r: int = 1 alpha: int = 1 diff --git a/src/adapters/model_mixin.py b/src/adapters/model_mixin.py index a4a1b8d17c..a023cf6156 100644 --- a/src/adapters/model_mixin.py +++ b/src/adapters/model_mixin.py @@ -573,10 +573,6 @@ def _add_adapter_weights(self, adapter_name: str): # PHM Layer if self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer"): - adapter_module = list(self.get_adapter(adapter_name)[0].values())[0] - # if multiple adapters with same location key exist they are returned as a modulelist - if isinstance(adapter_module, nn.ModuleList): - adapter_module = adapter_module[0] adapter_config = self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer") if adapter_config["shared_phm_rule"] or adapter_config["shared_W_phm"]: if self.config.model_type in SUBMODEL_NAMES: diff --git a/tests/methods/base.py b/tests/methods/base.py index 1ec6d276dc..67d8e26909 100644 --- a/tests/methods/base.py +++ b/tests/methods/base.py @@ -58,6 +58,26 @@ def run_add_test(self, model, adapter_config, filter_keys): self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) + def run_leave_out_test(self, model, adapter_config, leave_out): + model.eval() + + adapter_config = adapter_config.replace(leave_out=leave_out) + name = "test_adapter_" + adapter_config.__class__.__name__ + model.add_adapter(name, config=adapter_config) + model.set_active_adapters([name]) + + # adapter is correctly added to config + self.assert_adapter_available(model, name) + + adapter = model.get_adapter(name) + + self.assertNotEqual(len(adapter), 0) + found_layers = list(adapter.keys()) + for layer in leave_out: + self.assertNotIn(layer, found_layers) + + model.delete_adapter(name) + def run_average_test(self, model, adapter_config, filter_keys): model.eval() diff --git a/tests/methods/test_adapter_common.py b/tests/methods/test_adapter_common.py index 5c543dadca..8d78c88e4a 100644 --- a/tests/methods/test_adapter_common.py +++ b/tests/methods/test_adapter_common.py @@ -45,6 +45,14 @@ def test_add_adapter(self): with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): self.run_add_test(model, adapter_config, filter_keys) + def test_leave_out_adapter(self): + model = self.get_model() + model.eval() + + for adapter_config, _ in self.adapter_configs_to_test: + with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): + self.run_leave_out_test(model, adapter_config, self.leave_out_layers) + def test_average_adapter(self): model = self.get_model() model.eval() diff --git a/tests/methods/test_compacter.py b/tests/methods/test_compacter.py index 253b0fbf4f..1b976da2de 100644 --- a/tests/methods/test_compacter.py +++ b/tests/methods/test_compacter.py @@ -10,6 +10,10 @@ def test_add_compacter(self): model = self.get_model() self.run_add_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."]) + def test_leave_out_compacter(self): + model = self.get_model() + self.run_leave_out_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), self.leave_out_layers) + def test_average_compacter(self): model = self.get_model() self.run_average_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."]) diff --git a/tests/methods/test_ia3.py b/tests/methods/test_ia3.py index 4d94b95ea2..0dc81d02be 100644 --- a/tests/methods/test_ia3.py +++ b/tests/methods/test_ia3.py @@ -10,6 +10,10 @@ def test_add_ia3(self): model = self.get_model() self.run_add_test(model, IA3Config(), ["loras.{name}."]) + def test_leave_out_ia3(self): + model = self.get_model() + self.run_leave_out_test(model, IA3Config(), self.leave_out_layers) + def test_average_ia3(self): model = self.get_model() self.run_average_test(model, IA3Config(), ["loras.{name}."]) diff --git a/tests/methods/test_lora.py b/tests/methods/test_lora.py index e3df77e40a..90f6d26ae8 100644 --- a/tests/methods/test_lora.py +++ b/tests/methods/test_lora.py @@ -10,6 +10,10 @@ def test_add_lora(self): model = self.get_model() self.run_add_test(model, LoRAConfig(), ["loras.{name}."]) + def test_leave_out_lora(self): + model = self.get_model() + self.run_leave_out_test(model, LoRAConfig(), self.leave_out_layers) + def test_average_lora(self): model = self.get_model() self.run_average_test(model, LoRAConfig(), ["loras.{name}."]) diff --git a/tests/methods/test_prefix_tuning.py b/tests/methods/test_prefix_tuning.py index 798f4b19d4..64f591d70a 100644 --- a/tests/methods/test_prefix_tuning.py +++ b/tests/methods/test_prefix_tuning.py @@ -13,6 +13,12 @@ def test_add_prefix_tuning(self): model = self.get_model() self.run_add_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."]) + def test_leave_out_prefix_tuning(self): + # Note: for prefix tuning, this test is a little weird as the prefix tuning weights are only returned for the first layer with a prefix and not all. + # It still kind of tests the right thing as we prune layers from the end, which will move the returned layer to the next layer with a prefix. + model = self.get_model() + self.run_leave_out_test(model, PrefixTuningConfig(flat=True), self.leave_out_layers) + def test_average_prefix_tuning(self): model = self.get_model() self.run_average_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."]) diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 0d7ea0b1fa..c01f0295a2 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -37,6 +37,7 @@ class AdapterTestBase: model_class = AutoAdapterModel # Default shape of inputs to use default_input_samples_shape = (3, 64) + leave_out_layers = [0, 1] do_run_train_tests = True def get_model(self): diff --git a/tests/test_albert.py b/tests/test_albert.py index 054dd31278..f7b1e98e9f 100644 --- a/tests/test_albert.py +++ b/tests/test_albert.py @@ -34,6 +34,7 @@ class AlbertAdapterTestBase(AdapterTestBase): num_hidden_groups=2, ) tokenizer_name = "albert-base-v2" + leave_out_layers = [0] @require_torch