Fix doc examples: modify config before super().__init__ (#14697)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -1662,10 +1662,10 @@ class BartDecoderWrapper(BartPretrainedModel):
|
|||||||
|
|
||||||
class BartForCausalLM(BartPretrainedModel):
|
class BartForCausalLM(BartPretrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = BartDecoderWrapper(config)
|
self.model = BartDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -2865,10 +2865,10 @@ class BigBirdPegasusDecoderWrapper(BigBirdPegasusPreTrainedModel):
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BigBirdPegasus, 'facebook/bart-large'->"google/bigbird-pegasus-large-arxiv"
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BigBirdPegasus, 'facebook/bart-large'->"google/bigbird-pegasus-large-arxiv"
|
||||||
class BigBirdPegasusForCausalLM(BigBirdPegasusPreTrainedModel):
|
class BigBirdPegasusForCausalLM(BigBirdPegasusPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = BigBirdPegasusDecoderWrapper(config)
|
self.model = BigBirdPegasusDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -1400,10 +1400,10 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel):
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot
|
||||||
class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
|
class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = BlenderbotDecoderWrapper(config)
|
self.model = BlenderbotDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -1374,10 +1374,10 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel):
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall
|
||||||
class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
|
class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = BlenderbotSmallDecoderWrapper(config)
|
self.model = BlenderbotSmallDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -1397,10 +1397,10 @@ class MarianDecoderWrapper(MarianPreTrainedModel):
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian
|
||||||
class MarianForCausalLM(MarianPreTrainedModel):
|
class MarianForCausalLM(MarianPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = MarianDecoderWrapper(config)
|
self.model = MarianDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -1665,10 +1665,10 @@ class MBartDecoderWrapper(MBartPreTrainedModel):
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart
|
||||||
class MBartForCausalLM(MBartPreTrainedModel):
|
class MBartForCausalLM(MBartPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = MBartDecoderWrapper(config)
|
self.model = MBartDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -1486,10 +1486,10 @@ class PegasusDecoderWrapper(PegasusPreTrainedModel):
|
|||||||
|
|
||||||
class PegasusForCausalLM(PegasusPreTrainedModel):
|
class PegasusForCausalLM(PegasusPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = PegasusDecoderWrapper(config)
|
self.model = PegasusDecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -744,10 +744,10 @@ class Speech2Text2DecoderWrapper(Speech2Text2PreTrainedModel):
|
|||||||
)
|
)
|
||||||
class Speech2Text2ForCausalLM(Speech2Text2PreTrainedModel):
|
class Speech2Text2ForCausalLM(Speech2Text2PreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = Speech2Text2DecoderWrapper(config)
|
self.model = Speech2Text2DecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -777,10 +777,10 @@ class TrOCRDecoderWrapper(TrOCRPreTrainedModel):
|
|||||||
)
|
)
|
||||||
class TrOCRForCausalLM(TrOCRPreTrainedModel):
|
class TrOCRForCausalLM(TrOCRPreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = TrOCRDecoderWrapper(config)
|
self.model = TrOCRDecoderWrapper(config)
|
||||||
|
|
||||||
self.output_projection = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.output_projection = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
@@ -3173,10 +3173,10 @@ class {{cookiecutter.camelcase_modelname}}DecoderWrapper({{cookiecutter.camelcas
|
|||||||
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->{{cookiecutter.camelcase_modelname}}
|
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->{{cookiecutter.camelcase_modelname}}
|
||||||
class {{cookiecutter.camelcase_modelname}}ForCausalLM({{cookiecutter.camelcase_modelname}}PreTrainedModel):
|
class {{cookiecutter.camelcase_modelname}}ForCausalLM({{cookiecutter.camelcase_modelname}}PreTrainedModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__(config)
|
|
||||||
config = copy.deepcopy(config)
|
config = copy.deepcopy(config)
|
||||||
config.is_decoder = True
|
config.is_decoder = True
|
||||||
config.is_encoder_decoder = False
|
config.is_encoder_decoder = False
|
||||||
|
super().__init__(config)
|
||||||
self.model = {{cookiecutter.camelcase_modelname}}DecoderWrapper(config)
|
self.model = {{cookiecutter.camelcase_modelname}}DecoderWrapper(config)
|
||||||
|
|
||||||
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||||
|
|||||||
Reference in New Issue
Block a user