diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 31f454cbae..1266792839 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -137,7 +137,7 @@ Follow these steps to start contributing:
 2. Clone your fork to your local disk, and add the base repository as a remote:
 
    ```bash
-   $ git clone git@github.com:<your Github handle>/transformers.git
+   $ git clone git@github.com:<your Github handle>/adapter-transformers.git
    $ cd adapter-transformers
    $ git remote add upstream https://github.com/Adapter-Hub/adapter-transformers.git
    ```
@@ -230,13 +230,13 @@ Follow these steps to start contributing:
    they can still be built. This check also runs in CI. To run a local check
    make sure you have installed the documentation builder requirements. First you will need to clone the
    repository containing our tools to build the documentation:
-   
+
    ```bash
    $ pip install git+https://github.com/huggingface/doc-builder
    ```
 
    Then, make sure you have all the dependencies to be able to build the doc with:
-   
+
    ```bash
    $ pip install ".[docs]"
    ```
@@ -307,7 +307,7 @@ Follow these steps to start contributing:
 6. All public methods must have informative docstrings that work nicely with sphinx. See `modeling_bert.py` for an
    example.
 7. Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos and other non-text files. We prefer to leverage a hf.co hosted `dataset` like
-   the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference 
+   the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference
    them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images).
    If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images
    to this dataset.
diff --git a/src/transformers/adapters/composition.py b/src/transformers/adapters/composition.py
index e9d032cd93..08da0558ca 100644
--- a/src/transformers/adapters/composition.py
+++ b/src/transformers/adapters/composition.py
@@ -99,7 +99,19 @@ def __init__(self, *split_adapters: List[Union[AdapterCompositionBlock, str]], b
 # Some composition blocks might not be supported by all models.
 # Add a whitelist of models for those here.
 SUPPORTED_MODELS = {
-    Parallel: ["bert", "roberta", "distilbert", "deberta-v2", "deberta", "bart", "mbart", "gpt2", "t5", "xlm-roberta"],
+    Parallel: [
+        "bert",
+        "roberta",
+        "distilbert",
+        "deberta-v2",
+        "deberta",
+        "bart",
+        "mbart",
+        "gpt2",
+        "t5",
+        "xlm-roberta",
+        "transformer",
+    ],
 }
 
 
diff --git a/src/transformers/adapters/mixins/transformer.py b/src/transformers/adapters/mixins/transformer.py
new file mode 100644
index 0000000000..3bbb3cddf6
--- /dev/null
+++ b/src/transformers/adapters/mixins/transformer.py
@@ -0,0 +1,34 @@
+import logging
+from typing import Iterable, Tuple
+
+import torch.nn as nn
+
+from ..layer import AdapterLayer
+from ..model_mixin import InvertibleAdaptersMixin, ModelAdaptersMixin
+
+
+logger = logging.getLogger(__name__)
+
+
+# For backwards compatibility, TransformerSelfOutput inherits directly from AdapterLayer
+class TransformerSelfOutputAdaptersMixin(AdapterLayer):
+    """Adds adapters to the TransformerSelfOutput module."""
+
+    def __init__(self):
+        super().__init__("mh_adapter", None)
+
+
+# For backwards compatibility, TransformerOutput inherits directly from AdapterLayer
+class TransformerOutputAdaptersMixin(AdapterLayer):
+    """Adds adapters to the TransformerOutput module."""
+
+    def __init__(self):
+        super().__init__("output_adapter", None)
+
+
+class TransformerModelAdaptersMixin(InvertibleAdaptersMixin, ModelAdaptersMixin):
+    """Adds adapters to the TransformerModel module."""
+
+    def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]:
+        for i, layer in enumerate(self.encoder.layer):
+            yield i, layer
diff --git a/src/transformers/adapters/models/auto.py b/src/transformers/adapters/models/auto.py
index 91b097258c..196bb80a25 100644
--- a/src/transformers/adapters/models/auto.py
+++ b/src/transformers/adapters/models/auto.py
@@ -18,6 +18,7 @@
         ("mbart", "MBartAdapterModel"),
         ("gpt2", "GPT2AdapterModel"),
         ("t5", "T5AdapterModel"),
+        ("transformer", "TransformerAdapterModel"),
     ]
 )
 MODEL_WITH_HEADS_MAPPING_NAMES = OrderedDict(
diff --git a/src/transformers/adapters/wrappers/configuration.py b/src/transformers/adapters/wrappers/configuration.py
index 088b709faa..15b1d10147 100644
--- a/src/transformers/adapters/wrappers/configuration.py
+++ b/src/transformers/adapters/wrappers/configuration.py
@@ -35,6 +35,7 @@
         "hidden_dropout_prob": "dropout_rate",
         "attention_probs_dropout_prob": "dropout_rate",
     },
+    "transformer": {},
     "xlm_roberta": {},
 }
 
diff --git a/utils/check_adapters.py b/utils/check_adapters.py
index 8a66782d75..dca9a0f296 100644
--- a/utils/check_adapters.py
+++ b/utils/check_adapters.py
@@ -15,6 +15,7 @@
     "t5",
     "deberta",
     "deberta-v2",
+    "transformer",
 ]
 
 IGNORE_NOT_IMPLEMENTING_MIXIN = [