Add leave_out to LoRA / (IA)^3.

Fix `leave_out` for Compacter. Add tests for `leave_out`.
adapter-hub · Nov 25, 2023 · 2f45b78 · 2f45b78
1 parent 4c00622
commit 2f45b78
Show file tree

Hide file tree

Showing 8 changed files with 50 additions and 4 deletions.
diff --git a/src/adapters/configuration/adapter_config.py b/src/adapters/configuration/adapter_config.py
@@ -438,6 +438,8 @@ class LoRAConfig(AdapterConfig):
             Defaults to False.
         output_lora (bool, optional): If True, add LoRA to the output MLP weights of a model.
             Defaults to False.
+        leave_out (:obj:`List[int]`, optional):
+            The IDs of the layers (starting at 0) where NO adapter modules should be added.
         r (int, optional): The rank of the LoRA layer. Defaults to 8.
         alpha (int, optional): The hyperparameter used for scaling the LoRA reparametrization. Defaults to 8.
         dropout (float, optional): The dropout rate used in the LoRA layer. Defaults to 0.0.
@@ -460,6 +462,7 @@ class LoRAConfig(AdapterConfig):
     selfattn_lora: bool = True
     intermediate_lora: bool = False
     output_lora: bool = False
+    leave_out: List[int] = field(default_factory=list)
 
     r: int = 8
     alpha: int = 8
@@ -481,6 +484,7 @@ class IA3Config(LoRAConfig):
     selfattn_lora: bool = True
     intermediate_lora: bool = True
     output_lora: bool = False
+    leave_out: List[int] = field(default_factory=list)
 
     r: int = 1
     alpha: int = 1

diff --git a/src/adapters/model_mixin.py b/src/adapters/model_mixin.py
@@ -573,10 +573,6 @@ def _add_adapter_weights(self, adapter_name: str):
 
         # PHM Layer
         if self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer"):
-            adapter_module = list(self.get_adapter(adapter_name)[0].values())[0]
-            # if multiple adapters with same location key exist they are returned as a modulelist
-            if isinstance(adapter_module, nn.ModuleList):
-                adapter_module = adapter_module[0]
             adapter_config = self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer")
             if adapter_config["shared_phm_rule"] or adapter_config["shared_W_phm"]:
                 if self.config.model_type in SUBMODEL_NAMES:

diff --git a/tests/methods/base.py b/tests/methods/base.py
@@ -58,6 +58,26 @@ def run_add_test(self, model, adapter_config, filter_keys):
             self.assertTrue(v.requires_grad, k)
         self.assertTrue(has_weights)
 
+    def run_leave_out_test(self, model, adapter_config, leave_out):
+        model.eval()
+
+        adapter_config = adapter_config.replace(leave_out=leave_out)
+        name = "test_adapter_" + adapter_config.__class__.__name__
+        model.add_adapter(name, config=adapter_config)
+        model.set_active_adapters([name])
+
+        # adapter is correctly added to config
+        self.assert_adapter_available(model, name)
+
+        adapter = model.get_adapter(name)
+
+        self.assertNotEqual(len(adapter), 0)
+        found_layers = list(adapter.keys())
+        for layer in leave_out:
+            self.assertNotIn(layer, found_layers)
+
+        model.delete_adapter(name)
+
     def run_average_test(self, model, adapter_config, filter_keys):
         model.eval()
 

diff --git a/tests/methods/test_adapter_common.py b/tests/methods/test_adapter_common.py
@@ -45,6 +45,14 @@ def test_add_adapter(self):
             with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__):
                 self.run_add_test(model, adapter_config, filter_keys)
 
+    def test_leave_out_adapter(self):
+        model = self.get_model()
+        model.eval()
+
+        for adapter_config, _ in self.adapter_configs_to_test:
+            with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__):
+                self.run_leave_out_test(model, adapter_config, [0, 1])
+
     def test_average_adapter(self):
         model = self.get_model()
         model.eval()

diff --git a/tests/methods/test_compacter.py b/tests/methods/test_compacter.py
@@ -10,6 +10,10 @@ def test_add_compacter(self):
         model = self.get_model()
         self.run_add_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."])
 
+    def test_leave_out_compacter(self):
+        model = self.get_model()
+        self.run_leave_out_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), [0, 1])
+
     def test_average_compacter(self):
         model = self.get_model()
         self.run_average_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."])

diff --git a/tests/methods/test_ia3.py b/tests/methods/test_ia3.py
@@ -10,6 +10,10 @@ def test_add_ia3(self):
         model = self.get_model()
         self.run_add_test(model, IA3Config(), ["loras.{name}."])
 
+    def test_leave_out_ia3(self):
+        model = self.get_model()
+        self.run_leave_out_test(model, IA3Config(), [0, 1])
+
     def test_average_ia3(self):
         model = self.get_model()
         self.run_average_test(model, IA3Config(), ["loras.{name}."])

diff --git a/tests/methods/test_lora.py b/tests/methods/test_lora.py
@@ -10,6 +10,10 @@ def test_add_lora(self):
         model = self.get_model()
         self.run_add_test(model, LoRAConfig(), ["loras.{name}."])
 
+    def test_leave_out_lora(self):
+        model = self.get_model()
+        self.run_leave_out_test(model, LoRAConfig(), [0, 1])
+
     def test_average_lora(self):
         model = self.get_model()
         self.run_average_test(model, LoRAConfig(), ["loras.{name}."])

diff --git a/tests/methods/test_prefix_tuning.py b/tests/methods/test_prefix_tuning.py
@@ -13,6 +13,12 @@ def test_add_prefix_tuning(self):
         model = self.get_model()
         self.run_add_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."])
 
+    def test_leave_out_prefix_tuning(self):
+        # Note: for prefix tuning, this test is a little weird as the prefix tuning weights are only returned for the first layer with a prefix and not all.
+        # It still kind of tests the right thing as we prune layers from the end, which will move the returned layer to the next layer with a prefix.
+        model = self.get_model()
+        self.run_leave_out_test(model, PrefixTuningConfig(flat=True), [0, 1])
+
     def test_average_prefix_tuning(self):
         model = self.get_model()
         self.run_average_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."])