adapter-hub
diff --git a/‎examples/pytorch/audio-classification/run_audio_classification.py
+5-3 b/‎examples/pytorch/audio-classification/run_audio_classification.py
+5-3
diff --git a/‎examples/pytorch/dependency-parsing/run_udp.py
+15-9 b/‎examples/pytorch/dependency-parsing/run_udp.py
+15-9
diff --git a/‎examples/pytorch/dependency-parsing/utils_udp.py
-2 b/‎examples/pytorch/dependency-parsing/utils_udp.py
-2
diff --git a/‎examples/pytorch/language-modeling/run_clm.py
+3-3 b/‎examples/pytorch/language-modeling/run_clm.py
+3-3
diff --git a/‎examples/pytorch/language-modeling/run_mlm.py
+3-3 b/‎examples/pytorch/language-modeling/run_mlm.py
+3-3
diff --git a/‎examples/pytorch/question-answering/utils_qa.py
+2-2 b/‎examples/pytorch/question-answering/utils_qa.py
+2-2
diff --git a/‎examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
-1 b/‎examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
-1
diff --git a/‎examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+5-5 b/‎examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+5-5
diff --git a/‎examples/pytorch/text-classification/run_glue.py
+4-2 b/‎examples/pytorch/text-classification/run_glue.py
+4-2
diff --git a/‎examples/pytorch/text-classification/run_glue_no_trainer.py
+4-2 b/‎examples/pytorch/text-classification/run_glue_no_trainer.py
+4-2
diff --git a/‎examples/pytorch/token-classification/run_ner.py
+4-2 b/‎examples/pytorch/token-classification/run_ner.py
+4-2
diff --git a/‎examples/pytorch/token-classification/run_ner_no_trainer.py
+4-2 b/‎examples/pytorch/token-classification/run_ner_no_trainer.py
+4-2
diff --git a/‎examples/pytorch/translation/run_translation_no_trainer.py
-2 b/‎examples/pytorch/translation/run_translation_no_trainer.py
-2
diff --git a/‎src/transformers/__init__.py
+9-9 b/‎src/transformers/__init__.py
+9-9
diff --git a/‎src/transformers/adapters/layer.py
-1 b/‎src/transformers/adapters/layer.py
-1
diff --git a/‎src/transformers/adapters/modeling.py
-1 b/‎src/transformers/adapters/modeling.py
-1
diff --git a/‎src/transformers/adapters/prefix_tuning.py
+5-3 b/‎src/transformers/adapters/prefix_tuning.py
+5-3
diff --git a/‎src/transformers/adapters/utils.py
+6-7 b/‎src/transformers/adapters/utils.py
+6-7
diff --git a/‎src/transformers/benchmark/benchmark.py
-1 b/‎src/transformers/benchmark/benchmark.py
-1
diff --git a/‎src/transformers/benchmark/benchmark_args.py
-1 b/‎src/transformers/benchmark/benchmark_args.py
-1
diff --git a/‎src/transformers/benchmark/benchmark_args_tf.py
-1 b/‎src/transformers/benchmark/benchmark_args_tf.py
-1
diff --git a/‎src/transformers/benchmark/benchmark_args_utils.py
+5-3 b/‎src/transformers/benchmark/benchmark_args_utils.py
+5-3
diff --git a/‎src/transformers/benchmark/benchmark_tf.py
-1 b/‎src/transformers/benchmark/benchmark_tf.py
-1
diff --git a/‎src/transformers/benchmark/benchmark_utils.py
+5-4 b/‎src/transformers/benchmark/benchmark_utils.py
+5-4
@@ -172,9 +172,11 @@ class ModelArguments:
     def __post_init__(self):
         if not self.freeze_feature_extractor and self.freeze_feature_encoder:
             warnings.warn(
-                "The argument `--freeze_feature_extractor` is deprecated and "
-                "will be removed in a future version. Use `--freeze_feature_encoder`"
-                "instead. Setting `freeze_feature_encoder==True`.",
+                (
+                    "The argument `--freeze_feature_extractor` is deprecated and "
+                    "will be removed in a future version. Use `--freeze_feature_encoder`"
+                    "instead. Setting `freeze_feature_encoder==True`."
+                ),
                 FutureWarning,
             )
         if self.freeze_feature_extractor and not self.freeze_feature_encoder:
 
@@ -156,9 +156,11 @@ def main():
         use_fast=model_args.use_fast,
         do_lower_case=model_args.do_lower_case,
         add_prefix_space=True,  # Used e.g. for RoBERTa
-        mecab_kwargs={"mecab_option": f"-r {model_args.mecab_dir} -d {model_args.mecab_dic_dir}"}
-        if model_args.is_japanese
-        else None,
+        mecab_kwargs=(
+            {"mecab_option": f"-r {model_args.mecab_dir} -d {model_args.mecab_dic_dir}"}
+            if model_args.is_japanese
+            else None
+        ),
     )
 
     # The task name (with prefix)
@@ -250,9 +252,11 @@ def main():
             if adapter_args.train_adapter:
                 adapter_config = AdapterConfigBase.load(adapter_args.adapter_config, **adapter_config_kwargs)
                 model.load_adapter(
-                    os.path.join(training_args.output_dir, "best_model", task_name)
-                    if training_args.do_train
-                    else adapter_args.load_adapter,
+                    (
+                        os.path.join(training_args.output_dir, "best_model", task_name)
+                        if training_args.do_train
+                        else adapter_args.load_adapter
+                    ),
                     config=adapter_config,
                     load_as=task_name,
                     **adapter_load_kwargs,
@@ -262,9 +266,11 @@ def main():
                         adapter_args.lang_adapter_config, **adapter_config_kwargs
                     )
                     lang_adapter_name = model.load_adapter(
-                        os.path.join(training_args.output_dir, "best_model", lang_adapter_name)
-                        if training_args.do_train
-                        else adapter_args.load_lang_adapter,
+                        (
+                            os.path.join(training_args.output_dir, "best_model", lang_adapter_name)
+                            if training_args.do_train
+                            else adapter_args.load_lang_adapter
+                        ),
                         config=lang_adapter_config,
                         load_as=lang_adapter_name,
                         **adapter_load_kwargs,
 
@@ -287,7 +287,6 @@ def predict(self, test_dataset: Dataset) -> PredictionOutput:
         return PredictionOutput(predictions=output.predictions, label_ids=output.label_ids, metrics=output.metrics)
 
     def store_best_model(self, output):
-
         if self.args.metric_score not in output.metrics:
             raise Exception(
                 "Metric %s not in output.\nThe following output was generated: %s",
@@ -340,7 +339,6 @@ def _prediction_loop(
         metric = ParsingMetric()
 
         for inputs in tqdm(dataloader, desc=description):
-
             for k, v in inputs.items():
                 inputs[k] = v.to(self.args.device)
 
 
@@ -537,9 +537,9 @@ def compute_metrics(eval_preds):
         # Data collator will default to DataCollatorWithPadding, so we change it.
         data_collator=default_data_collator,
         compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,
-        preprocess_logits_for_metrics=preprocess_logits_for_metrics
-        if training_args.do_eval and not is_torch_tpu_available()
-        else None,
+        preprocess_logits_for_metrics=(
+            preprocess_logits_for_metrics if training_args.do_eval and not is_torch_tpu_available() else None
+        ),
     )
 
     # Training
 
@@ -553,9 +553,9 @@ def compute_metrics(eval_preds):
         tokenizer=tokenizer,
         data_collator=data_collator,
         compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,
-        preprocess_logits_for_metrics=preprocess_logits_for_metrics
-        if training_args.do_eval and not is_torch_tpu_available()
-        else None,
+        preprocess_logits_for_metrics=(
+            preprocess_logits_for_metrics if training_args.do_eval and not is_torch_tpu_available() else None
+        ),
     )
 
     # Training
 
@@ -213,7 +213,7 @@ def postprocess_qa_predictions(
 
         # Make `predictions` JSON-serializable by casting np.float back to float.
         all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            {k: float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v for k, v in pred.items()}
             for pred in predictions
         ]
 
@@ -406,7 +406,7 @@ def postprocess_qa_predictions_with_beam_search(
 
         # Make `predictions` JSON-serializable by casting np.float back to float.
         all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            {k: float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v for k, v in pred.items()}
             for pred in predictions
         ]
 
 
@@ -641,7 +641,6 @@ def prepare_dataset(batch):
 
             # update step
             if (step + 1) % args.gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
-
                 # compute grad norm for monitoring
                 scale = (
                     accelerator.scaler._scale.item()
 
@@ -682,10 +682,11 @@ def compute_metrics(pred):
         processor = AutoProcessor.from_pretrained(training_args.output_dir)
     except (OSError, KeyError):
         warnings.warn(
-            "Loading a processor from a feature extractor config that does not"
-            " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
-            " attribute to your `preprocessor_config.json` file to suppress this warning: "
-            " `'processor_class': 'Wav2Vec2Processor'`",
+            (
+                "Loading a processor from a feature extractor config that does not include a `processor_class`"
+                " attribute is deprecated and will be removed in v5. Please add the following  attribute to your"
+                " `preprocessor_config.json` file to suppress this warning:  `'processor_class': 'Wav2Vec2Processor'`"
+            ),
             FutureWarning,
         )
         processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
@@ -708,7 +709,6 @@ def compute_metrics(pred):
 
     # Training
     if training_args.do_train:
-
         # use last checkpoint if exist
         if last_checkpoint is not None:
             checkpoint = last_checkpoint
 
@@ -416,8 +416,10 @@ def main():
         else:
             logger.warning(
                 "Your model seems to have been trained with labels, but they don't match the dataset: ",
-                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
-                "\nIgnoring the model labels as a result.",
+                (
+                    f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:"
+                    f" {list(sorted(label_list))}.\nIgnoring the model labels as a result."
+                ),
             )
     elif data_args.task_name is None and not is_regression:
         label_to_id = {v: i for i, v in enumerate(label_list)}
 
@@ -348,8 +348,10 @@ def main():
         else:
             logger.warning(
                 "Your model seems to have been trained with labels, but they don't match the dataset: ",
-                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
-                "\nIgnoring the model labels as a result.",
+                (
+                    f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels:"
+                    f" {list(sorted(label_list))}.\nIgnoring the model labels as a result."
+                ),
             )
     elif args.task_name is None and not is_regression:
         label_to_id = {v: i for i, v in enumerate(label_list)}
 
@@ -396,8 +396,10 @@ def get_label_list(labels):
         else:
             logger.warning(
                 "Your model seems to have been trained with labels, but they don't match the dataset: ",
-                f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
-                f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
+                (
+                    f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
+                    f" {list(sorted(label_list))}.\nIgnoring the model labels as a result."
+                ),
             )
 
     # Set the correspondences label/ID inside the model config
 
@@ -436,8 +436,10 @@ def get_label_list(labels):
         else:
             logger.warning(
                 "Your model seems to have been trained with labels, but they don't match the dataset: ",
-                f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
-                f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
+                (
+                    f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
+                    f" {list(sorted(label_list))}.\nIgnoring the model labels as a result."
+                ),
             )
 
     # Set the correspondences label/ID inside the model config
 
@@ -69,7 +69,6 @@
 
 # Parsing input arguments
 def parse_args():
-
     parser = argparse.ArgumentParser(description="Finetune a transformers model on a text classification task")
     parser.add_argument(
         "--dataset_name",
@@ -751,5 +750,4 @@ def postprocess_text(preds, labels):
 
 
 if __name__ == "__main__":
-
     main()
@@ -3200,15 +3200,6 @@
             "TFGPT2PreTrainedModel",
         ]
     )
-    _import_structure["models.gptj"].extend(
-        [
-            "TFGPTJForCausalLM",
-            "TFGPTJForQuestionAnswering",
-            "TFGPTJForSequenceClassification",
-            "TFGPTJModel",
-            "TFGPTJPreTrainedModel",
-        ]
-    )
     _import_structure["models.gpt_neox"].extend(
         [
             "TFGPTNeoXForCausalLM",
@@ -3218,6 +3209,15 @@
             "TFGPTNeoXPreTrainedModel",
         ]
     )
+    _import_structure["models.gptj"].extend(
+        [
+            "TFGPTJForCausalLM",
+            "TFGPTJForQuestionAnswering",
+            "TFGPTJForSequenceClassification",
+            "TFGPTJModel",
+            "TFGPTJPreTrainedModel",
+        ]
+    )
     _import_structure["models.groupvit"].extend(
         [
             "TF_GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST",
 
@@ -488,7 +488,6 @@ def adapter_batchsplit(self, adapter_setup: BatchSplit, hidden_states, input_ten
                 children_hidden.append(child)
             # Case 4: We have a single adapter which is part of this module -> forward pass
             elif adapter_block in self.adapters:
-
                 adapter_layer = self.adapters[adapter_block]
                 context = ForwardContext.get_context()
                 layer_output = adapter_layer(
 
@@ -357,7 +357,6 @@ def __init__(
         self.reduction = self.T / 1000.0
 
     def forward(self, query, key, value, residual, output_attentions: bool = False):
-
         if self.config["residual_before"]:
             value += residual[:, :, None, :].repeat(1, 1, value.size(2), 1)
 
 
@@ -513,9 +513,11 @@ def adapter_parallel(
                 "key_states": key_states[i * orig_batch_size : (i + 1) * orig_batch_size],
                 "value_states": value_states[i * orig_batch_size : (i + 1) * orig_batch_size],
                 "residual_input": residual_input[i * orig_batch_size : (i + 1) * orig_batch_size],
-                "attention_mask": attention_mask[i * orig_batch_size : (i + 1) * orig_batch_size]
-                if attention_mask is not None
-                else None,
+                "attention_mask": (
+                    attention_mask[i * orig_batch_size : (i + 1) * orig_batch_size]
+                    if attention_mask is not None
+                    else None
+                ),
                 "invert_mask": invert_mask,
                 "idx_range": idx_range,
             }
 
@@ -284,7 +284,6 @@ def get_from_cache(
     # Prevent parallel downloads of the same file with a lock.
     lock_path = cache_path + ".lock"
     with FileLock(lock_path):
-
         # If the download just completed while the lock was activated.
         if os.path.exists(cache_path) and not force_download:
             # Even if returning early like here, the lock will be released.
@@ -768,9 +767,9 @@ def list_adapters(source: str = None, model_name: str = None) -> List[AdapterInf
             adapter_info = AdapterInfo(
                 source="hf",
                 adapter_id=model_info.modelId,
-                model_name=model_info.config.get("adapter_transformers", {}).get("model_name")
-                if model_info.config
-                else None,
+                model_name=(
+                    model_info.config.get("adapter_transformers", {}).get("model_name") if model_info.config else None
+                ),
                 username=model_info.modelId.split("/")[0],
                 sha1_checksum=model_info.sha,
             )
@@ -809,9 +808,9 @@ def get_adapter_info(adapter_id: str, source: str = "ah") -> Optional[AdapterInf
             return AdapterInfo(
                 source="hf",
                 adapter_id=model_info.modelId,
-                model_name=model_info.config.get("adapter_transformers", {}).get("model_name")
-                if model_info.config
-                else None,
+                model_name=(
+                    model_info.config.get("adapter_transformers", {}).get("model_name") if model_info.config else None
+                ),
                 username=model_info.modelId.split("/")[0],
                 sha1_checksum=model_info.sha,
             )
 
@@ -48,7 +48,6 @@
 
 
 class PyTorchBenchmark(Benchmark):
-
     args: PyTorchBenchmarkArguments
     configs: PretrainedConfig
     framework: str = "PyTorch"
 
@@ -33,7 +33,6 @@
 
 @dataclass
 class PyTorchBenchmarkArguments(BenchmarkArguments):
-
     deprecated_args = [
         "no_inference",
         "no_cuda",
 
@@ -30,7 +30,6 @@
 
 @dataclass
 class TensorFlowBenchmarkArguments(BenchmarkArguments):
-
     deprecated_args = [
         "no_inference",
         "no_cuda",
 
@@ -134,9 +134,11 @@ class BenchmarkArguments:
 
     def __post_init__(self):
         warnings.warn(
-            f"The class {self.__class__} is deprecated. Hugging Face Benchmarking utils"
-            " are deprecated in general and it is advised to use external Benchmarking libraries "
-            " to benchmark Transformer models.",
+            (
+                f"The class {self.__class__} is deprecated. Hugging Face Benchmarking utils"
+                " are deprecated in general and it is advised to use external Benchmarking libraries "
+                " to benchmark Transformer models."
+            ),
             FutureWarning,
         )
 
 
@@ -77,7 +77,6 @@ def random_input_ids(batch_size: int, sequence_length: int, vocab_size: int) ->
 
 
 class TensorFlowBenchmark(Benchmark):
-
     args: TensorFlowBenchmarkArguments
     configs: PretrainedConfig
     framework: str = "TensorFlow"
 
@@ -613,9 +613,11 @@ def __init__(self, args: BenchmarkArguments = None, configs: PretrainedConfig =
             self.config_dict = {model_name: config for model_name, config in zip(self.args.model_names, configs)}
 
         warnings.warn(
-            f"The class {self.__class__} is deprecated. Hugging Face Benchmarking utils"
-            " are deprecated in general and it is advised to use external Benchmarking libraries "
-            " to benchmark Transformer models.",
+            (
+                f"The class {self.__class__} is deprecated. Hugging Face Benchmarking utils"
+                " are deprecated in general and it is advised to use external Benchmarking libraries "
+                " to benchmark Transformer models."
+            ),
             FutureWarning,
         )
 
@@ -890,7 +892,6 @@ def save_to_csv(self, result_dict, filename):
             return
         self.print_fn("Saving results to csv.")
         with open(filename, mode="w") as csv_file:
-
             assert len(self.args.model_names) > 0, f"At least 1 model should be defined, but got {self.model_names}"
 
             fieldnames = ["model", "batch_size", "sequence_length"]
Original file line number	Diff line number	Diff line change
`@@ -213,7 +213,7 @@ def postprocess_qa_predictions(`
`213`	`213`
`214`	`214`	# Make `predictions` JSON-serializable by casting np.float back to float.
`215`	`215`	`all_nbest_json[example["id"]] = [`
`216`		`- {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}`
	`216`	`+ {k: float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v for k, v in pred.items()}`
`217`	`217`	`for pred in predictions`
`218`	`218`	`]`
`219`	`219`
`@@ -406,7 +406,7 @@ def postprocess_qa_predictions_with_beam_search(`
`406`	`406`
`407`	`407`	# Make `predictions` JSON-serializable by casting np.float back to float.
`408`	`408`	`all_nbest_json[example["id"]] = [`
`409`		`- {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}`
	`409`	`+ {k: float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v for k, v in pred.items()}`
`410`	`410`	`for pred in predictions`
`411`	`411`	`]`
`412`	`412`
Original file line number	Diff line number	Diff line change
`@@ -3200,15 +3200,6 @@`
`3200`	`3200`	`"TFGPT2PreTrainedModel",`
`3201`	`3201`	`]`
`3202`	`3202`	`)`
`3203`		`- _import_structure["models.gptj"].extend(`
`3204`		`- [`
`3205`		`- "TFGPTJForCausalLM",`
`3206`		`- "TFGPTJForQuestionAnswering",`
`3207`		`- "TFGPTJForSequenceClassification",`
`3208`		`- "TFGPTJModel",`
`3209`		`- "TFGPTJPreTrainedModel",`
`3210`		`- ]`
`3211`		`- )`
`3212`	`3203`	`_import_structure["models.gpt_neox"].extend(`
`3213`	`3204`	`[`
`3214`	`3205`	`"TFGPTNeoXForCausalLM",`
`@@ -3218,6 +3209,15 @@`
`3218`	`3209`	`"TFGPTNeoXPreTrainedModel",`
`3219`	`3210`	`]`
`3220`	`3211`	`)`
	`3212`	`+ _import_structure["models.gptj"].extend(`
	`3213`	`+ [`
	`3214`	`+ "TFGPTJForCausalLM",`
	`3215`	`+ "TFGPTJForQuestionAnswering",`
	`3216`	`+ "TFGPTJForSequenceClassification",`
	`3217`	`+ "TFGPTJModel",`
	`3218`	`+ "TFGPTJPreTrainedModel",`
	`3219`	`+ ]`
	`3220`	`+ )`
`3221`	`3221`	`_import_structure["models.groupvit"].extend(`
`3222`	`3222`	`[`
`3223`	`3223`	`"TF_GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST",`