Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add leave_out to LoRA / (IA)^3 #608

Merged
merged 2 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/adapters/configuration/adapter_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ class LoRAConfig(AdapterConfig):
Defaults to False.
output_lora (bool, optional): If True, add LoRA to the output MLP weights of a model.
Defaults to False.
leave_out (:obj:`List[int]`, optional):
The IDs of the layers (starting at 0) where NO adapter modules should be added.
r (int, optional): The rank of the LoRA layer. Defaults to 8.
alpha (int, optional): The hyperparameter used for scaling the LoRA reparametrization. Defaults to 8.
dropout (float, optional): The dropout rate used in the LoRA layer. Defaults to 0.0.
Expand All @@ -460,6 +462,7 @@ class LoRAConfig(AdapterConfig):
selfattn_lora: bool = True
intermediate_lora: bool = False
output_lora: bool = False
leave_out: List[int] = field(default_factory=list)

r: int = 8
alpha: int = 8
Expand All @@ -481,6 +484,7 @@ class IA3Config(LoRAConfig):
selfattn_lora: bool = True
intermediate_lora: bool = True
output_lora: bool = False
leave_out: List[int] = field(default_factory=list)

r: int = 1
alpha: int = 1
Expand Down
4 changes: 0 additions & 4 deletions src/adapters/model_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,6 @@ def _add_adapter_weights(self, adapter_name: str):

# PHM Layer
if self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer"):
adapter_module = list(self.get_adapter(adapter_name)[0].values())[0]
# if multiple adapters with same location key exist they are returned as a modulelist
if isinstance(adapter_module, nn.ModuleList):
adapter_module = adapter_module[0]
adapter_config = self.adapters_config.match(adapter_name, BnConfig, location_key="phm_layer")
if adapter_config["shared_phm_rule"] or adapter_config["shared_W_phm"]:
if self.config.model_type in SUBMODEL_NAMES:
Expand Down
20 changes: 20 additions & 0 deletions tests/methods/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,26 @@ def run_add_test(self, model, adapter_config, filter_keys):
self.assertTrue(v.requires_grad, k)
self.assertTrue(has_weights)

def run_leave_out_test(self, model, adapter_config, leave_out):
model.eval()

adapter_config = adapter_config.replace(leave_out=leave_out)
name = "test_adapter_" + adapter_config.__class__.__name__
model.add_adapter(name, config=adapter_config)
model.set_active_adapters([name])

# adapter is correctly added to config
self.assert_adapter_available(model, name)

adapter = model.get_adapter(name)

self.assertNotEqual(len(adapter), 0)
found_layers = list(adapter.keys())
for layer in leave_out:
self.assertNotIn(layer, found_layers)

model.delete_adapter(name)

def run_average_test(self, model, adapter_config, filter_keys):
model.eval()

Expand Down
8 changes: 8 additions & 0 deletions tests/methods/test_adapter_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ def test_add_adapter(self):
with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__):
self.run_add_test(model, adapter_config, filter_keys)

def test_leave_out_adapter(self):
model = self.get_model()
model.eval()

for adapter_config, _ in self.adapter_configs_to_test:
with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__):
self.run_leave_out_test(model, adapter_config, self.leave_out_layers)

def test_average_adapter(self):
model = self.get_model()
model.eval()
Expand Down
4 changes: 4 additions & 0 deletions tests/methods/test_compacter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ def test_add_compacter(self):
model = self.get_model()
self.run_add_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."])

def test_leave_out_compacter(self):
model = self.get_model()
self.run_leave_out_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), self.leave_out_layers)

def test_average_compacter(self):
model = self.get_model()
self.run_average_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."])
Expand Down
4 changes: 4 additions & 0 deletions tests/methods/test_ia3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ def test_add_ia3(self):
model = self.get_model()
self.run_add_test(model, IA3Config(), ["loras.{name}."])

def test_leave_out_ia3(self):
model = self.get_model()
self.run_leave_out_test(model, IA3Config(), self.leave_out_layers)

def test_average_ia3(self):
model = self.get_model()
self.run_average_test(model, IA3Config(), ["loras.{name}."])
Expand Down
4 changes: 4 additions & 0 deletions tests/methods/test_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ def test_add_lora(self):
model = self.get_model()
self.run_add_test(model, LoRAConfig(), ["loras.{name}."])

def test_leave_out_lora(self):
model = self.get_model()
self.run_leave_out_test(model, LoRAConfig(), self.leave_out_layers)

def test_average_lora(self):
model = self.get_model()
self.run_average_test(model, LoRAConfig(), ["loras.{name}."])
Expand Down
6 changes: 6 additions & 0 deletions tests/methods/test_prefix_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ def test_add_prefix_tuning(self):
model = self.get_model()
self.run_add_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."])

def test_leave_out_prefix_tuning(self):
# Note: for prefix tuning, this test is a little weird as the prefix tuning weights are only returned for the first layer with a prefix and not all.
# It still kind of tests the right thing as we prune layers from the end, which will move the returned layer to the next layer with a prefix.
model = self.get_model()
self.run_leave_out_test(model, PrefixTuningConfig(flat=True), self.leave_out_layers)

def test_average_prefix_tuning(self):
model = self.get_model()
self.run_average_test(model, PrefixTuningConfig(flat=True), ["prefix_tunings.{name}."])
Expand Down
1 change: 1 addition & 0 deletions tests/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class AdapterTestBase:
model_class = AutoAdapterModel
# Default shape of inputs to use
default_input_samples_shape = (3, 64)
leave_out_layers = [0, 1]
do_run_train_tests = True

def get_model(self):
Expand Down
1 change: 1 addition & 0 deletions tests/test_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class AlbertAdapterTestBase(AdapterTestBase):
num_hidden_groups=2,
)
tokenizer_name = "albert-base-v2"
leave_out_layers = [0]


@require_torch
Expand Down
Loading