Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix GenerationMixin warning for AdapterModel classes #787

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/adapters/models/bart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.bart.modeling_bart import (
BART_INPUTS_DOCSTRING,
BART_START_DOCSTRING,
Expand All @@ -18,7 +19,9 @@
@add_start_docstrings(
"BART Model with the option to add multiple flexible prediction heads on top.", BART_START_DOCSTRING
)
class BartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BartPreTrainedModel):
class BartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/bert/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.bert.modeling_bert import (
BERT_INPUTS_DOCSTRING,
BERT_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Bert Model transformer with the option to add multiple flexible heads on top.""",
BERT_START_DOCSTRING,
)
class BertAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertPreTrainedModel):
class BertAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/bert_generation/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.bert_generation.modeling_bert_generation import (
BERT_GENERATION_INPUTS_DOCSTRING,
BERT_GENERATION_START_DOCSTRING,
Expand All @@ -17,7 +18,7 @@
BERT_GENERATION_START_DOCSTRING,
)
class BertGenerationAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertGenerationPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, BertGenerationPreTrainedModel, GenerationMixin
):
_keys_to_ignore_on_load_unexpected = [r"lm_head.bias"]

Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/distilbert/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch.nn as nn

from transformers.generation import GenerationMixin
from transformers.models.distilbert.modeling_distilbert import (
DISTILBERT_INPUTS_DOCSTRING,
DISTILBERT_START_DOCSTRING,
Expand All @@ -18,7 +19,7 @@
DISTILBERT_START_DOCSTRING,
)
class DistilBertAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, DistilBertPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, DistilBertPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/electra/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.electra.modeling_electra import (
ELECTRA_INPUTS_DOCSTRING,
ELECTRA_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Electra Model transformer with the option to add multiple flexible heads on top.""",
ELECTRA_START_DOCSTRING,
)
class ElectraAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, ElectraPreTrainedModel):
class ElectraAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, ElectraPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/gpt2/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.gpt2.modeling_gpt2 import GPT2_START_DOCSTRING, GPT2Model, GPT2PreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
GPT2_START_DOCSTRING,
)
class GPT2AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPT2PreTrainedModel):
class GPT2AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPT2PreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/gptj/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.gptj.modeling_gptj import GPTJ_START_DOCSTRING, GPTJModel, GPTJPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
GPTJ_START_DOCSTRING,
)
class GPTJAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPTJPreTrainedModel):
class GPTJAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, GPTJPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/llama/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.llama.modeling_llama import LLAMA_START_DOCSTRING, LlamaModel, LlamaPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -26,7 +27,9 @@
""",
LLAMA_START_DOCSTRING,
)
class LlamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, LlamaPreTrainedModel):
class LlamaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, LlamaPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mbart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.mbart.modeling_mbart import (
MBART_INPUTS_DOCSTRING,
MBART_START_DOCSTRING,
Expand All @@ -19,7 +20,9 @@
@add_start_docstrings(
"MBART Model with the option to add multiple flexible prediction heads on top.", MBART_START_DOCSTRING
)
class MBartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MBartPreTrainedModel):
class MBartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MBartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mistral/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.mistral.modeling_mistral import MISTRAL_START_DOCSTRING, MistralModel, MistralPreTrainedModel
from transformers.utils import add_start_docstrings

Expand All @@ -25,7 +26,9 @@
""",
MISTRAL_START_DOCSTRING,
)
class MistralAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MistralPreTrainedModel):
class MistralAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MistralPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/mt5/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.mt5.modeling_mt5 import (
MT5_INPUTS_DOCSTRING,
MT5_START_DOCSTRING,
Expand All @@ -22,7 +23,9 @@
@add_start_docstrings(
"MT5 Model with the option to add multiple flexible prediction heads on top.", MT5_START_DOCSTRING
)
class MT5AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MT5PreTrainedModel):
class MT5AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MT5PreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/plbart/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch

from transformers.generation import GenerationMixin
from transformers.models.plbart.modeling_plbart import (
PLBART_INPUTS_DOCSTRING,
PLBART_START_DOCSTRING,
Expand All @@ -18,7 +19,9 @@
@add_start_docstrings(
"PLBART Model with the option to add multiple flexible prediction heads on top.", PLBART_START_DOCSTRING
)
class PLBartAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, PLBartPreTrainedModel):
class PLBartAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, PLBartPreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/roberta/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.roberta.modeling_roberta import (
ROBERTA_INPUTS_DOCSTRING,
ROBERTA_START_DOCSTRING,
Expand All @@ -16,7 +17,9 @@
"""Roberta Model transformer with the option to add multiple flexible heads on top.""",
ROBERTA_START_DOCSTRING,
)
class RobertaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, RobertaPreTrainedModel):
class RobertaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, RobertaPreTrainedModel, GenerationMixin
):
head_types = [
"classification",
"multilabel_classification",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/t5/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.t5.modeling_t5 import T5_INPUTS_DOCSTRING, T5_START_DOCSTRING, T5Model, T5PreTrainedModel
from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward

Expand All @@ -15,7 +16,9 @@


@add_start_docstrings("T5 Model with the option to add multiple flexible prediction heads on top.", T5_START_DOCSTRING)
class T5AdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, T5PreTrainedModel):
class T5AdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, T5PreTrainedModel, GenerationMixin
):
_tied_weights_keys = [
"encoder.embed_tokens.weight",
"decoder.embed_tokens.weight",
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/whisper/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch

from transformers import EncoderDecoderCache, StaticCache
from transformers.generation import GenerationMixin
from transformers.models.whisper.modeling_whisper import (
WHISPER_INPUTS_DOCSTRING,
WHISPER_START_DOCSTRING,
Expand All @@ -19,7 +20,9 @@
@add_start_docstrings(
"WHISPER Model with the option to add multiple flexible prediction heads on top.", WHISPER_START_DOCSTRING
)
class WhisperAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, WhisperPreTrainedModel):
class WhisperAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, WhisperPreTrainedModel, GenerationMixin
):
_tied_weights_keys = []
head_types = ["seq2seq_lm"]

Expand Down
3 changes: 2 additions & 1 deletion src/adapters/models/xlm_roberta/adapter_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from transformers.generation import GenerationMixin
from transformers.models.xlm_roberta.modeling_xlm_roberta import (
XLM_ROBERTA_INPUTS_DOCSTRING,
XLM_ROBERTA_START_DOCSTRING,
Expand All @@ -17,7 +18,7 @@
XLM_ROBERTA_START_DOCSTRING,
)
class XLMRobertaAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XLMRobertaPreTrainedModel
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XLMRobertaPreTrainedModel, GenerationMixin
):

head_types = [
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/models/xmod/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from transformers.generation import GenerationMixin
from transformers.models.xmod.modeling_xmod import (
XMOD_INPUTS_DOCSTRING,
XMOD_START_DOCSTRING,
Expand All @@ -20,7 +21,9 @@
"""X-MOD Model transformer with the option to add multiple flexible heads on top.""",
XMOD_START_DOCSTRING,
)
class XmodAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XmodPreTrainedModel):
class XmodAdapterModel(
EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, XmodPreTrainedModel, GenerationMixin
):

head_types = [
"classification",
Expand Down
Loading