add doctests for bart like seq2seq models (#15987)

* boom boom

* enable doctest for few seq2seq models

* add seq2seq models in documentation_tests.txt

* fix docstring blenderbot

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* fix seq classif doc sample

* don't check loss for seq classif examples

* +IGNORE_OUTPUT => +IGNORE_RESULT

* fix _SEQ_CLASS_EXPECTED_OUTPUT_SHAPE

* fix some docs

* more fixes

* last fix (hopefully)

* fix big bird gen example

* fix mbart gen example

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Suraj Patil
2022-03-09 20:30:38 +01:00
committed by GitHub
parent b256f3518d
commit a69e185074
10 changed files with 301 additions and 161 deletions

View File

@@ -1012,6 +1012,8 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> from transformers import {processor_class}, {model_class} >>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}")
@@ -1022,8 +1024,16 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> round(loss.item(), 2)
{expected_loss}
>>> start_scores = outputs.start_logits >>> start_scores = outputs.start_logits
>>> list(start_scores.shape)
{expected_output}
>>> end_scores = outputs.end_logits >>> end_scores = outputs.end_logits
>>> list(end_scores.shape)
{expected_output}
``` ```
""" """
@@ -1031,33 +1041,40 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
Example of single-label classification: Example of single-label classification:
```python ```python
>>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> from transformers import {processor_class}, {model_class}
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=2)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
>>> list(logits.shape)
{expected_output}
``` ```
Example of multi-label classification: Example of multi-label classification:
```python ```python
>>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> from transformers import {processor_class}, {model_class}
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification", num_labels=2)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss >>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> list(logits.shape)
{expected_output}
``` ```
""" """

View File

@@ -48,14 +48,24 @@ from .configuration_bart import BartConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "facebook/bart-large" _CHECKPOINT_FOR_DOC = "facebook/bart-base"
_CONFIG_FOR_DOC = "BartConfig" _CONFIG_FOR_DOC = "BartConfig"
_TOKENIZER_FOR_DOC = "BartTokenizer" _TOKENIZER_FOR_DOC = "BartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 2.98
_QA_EXPECTED_OUTPUT_SHAPE = [1, 17]
BART_PRETRAINED_MODEL_ARCHIVE_LIST = [ BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/bart-large", "facebook/bart-large",
# See all BART models at https://huggingface.co/models?filter=bart # see all BART models at https://huggingface.co/models?filter=bart
] ]
@@ -542,12 +552,17 @@ BART_GENERATION_EXAMPLE = r"""
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = (
... "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
... )
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> summary_ids = model.generate(inputs["input_ids"], num_beams=2, max_length=20)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'PG&E scheduled the blackouts in response to forecasts for high winds amid dry conditions'
``` ```
Mask filling example: Mask filling example:
@@ -555,10 +570,10 @@ BART_GENERATION_EXAMPLE = r"""
```python ```python
>>> from transformers import BartTokenizer, BartForConditionalGeneration >>> from transformers import BartTokenizer, BartForConditionalGeneration
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
>>> TXT = "My friends are <mask> but they eat too many carbs." >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large") >>> TXT = "My friends are <mask> but they eat too many carbs."
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> logits = model(input_ids).logits >>> logits = model(input_ids).logits
@@ -567,6 +582,7 @@ BART_GENERATION_EXAMPLE = r"""
>>> values, predictions = probs.topk(5) >>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split() >>> tokenizer.decode(predictions).split()
['not', 'good', 'healthy', 'great', 'very']
``` ```
""" """
@@ -641,11 +657,10 @@ BART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -966,8 +981,8 @@ class BartDecoder(BartPretrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1153,6 +1168,7 @@ class BartModel(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -1434,6 +1450,7 @@ class BartForSequenceClassification(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -1558,6 +1575,8 @@ class BartForQuestionAnswering(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -1789,13 +1808,16 @@ class BartForCausalLM(BartPretrainedModel):
```python ```python
>>> from transformers import BartTokenizer, BartForCausalLM >>> from transformers import BartTokenizer, BartForCausalLM
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
>>> model = BartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) >>> model = BartForCausalLM.from_pretrained("facebook/bart-base", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -53,6 +53,16 @@ _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv"
_CONFIG_FOR_DOC = "BigBirdPegasusConfig" _CONFIG_FOR_DOC = "BigBirdPegasusConfig"
_TOKENIZER_FOR_DOC = "PegasusTokenizer" _TOKENIZER_FOR_DOC = "PegasusTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 7, 1024]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 2.56
_QA_EXPECTED_OUTPUT_SHAPE = [1, 12]
BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [ BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/bigbird-pegasus-large-arxiv", "google/bigbird-pegasus-large-arxiv",
@@ -1627,12 +1637,20 @@ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r"""
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv") >>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv") >>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv")
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = (
... "The dominant sequence transduction models are based on complex recurrent or convolutional neural "
... "networks in an encoder-decoder configuration. The best performing models also connect the encoder "
... "and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, "
... "based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. "
... "Experiments on two machine translation tasks show these models to be superior in quality "
... "while being more parallelizable and requiring significantly less time to train."
... )
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True)
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=15)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'dominant sequence models are based on recurrent or convolutional neural networks .'
``` ```
""" """
@@ -1684,11 +1702,10 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -2159,8 +2176,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -2346,6 +2363,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -2630,6 +2648,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -2755,6 +2774,8 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,

View File

@@ -506,20 +506,37 @@ BLENDERBOT_START_DOCSTRING = r"""
""" """
BLENDERBOT_GENERATION_EXAMPLE = r""" BLENDERBOT_GENERATION_EXAMPLE = r"""
Conversation example:: Conversation example:
>>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration >>> mname = ```python
'facebook/blenderbot-400M-distill' >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname) >>> >>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
tokenizer = BlenderbotTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat too
many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>>
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids,
skip_special_tokens=True)[0])
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they >>> mname = "facebook/blenderbot-400M-distill"
eat too many carbs.</s> <s>That's unfortunate. " ... "Are they trying to lose weight or are they just trying to >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname)
be healthier?</s> " ... "<s> I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') >>> tokenizer = BlenderbotTokenizer.from_pretrained(mname)
>>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, >>> UTTERANCE = "My friends are cool but they eat too many carbs."
skip_special_tokens=True)[0]) >>> print("Human: ", UTTERANCE)
Human: My friends are cool but they eat too many carbs.
>>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
>>> reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
Bot: That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?
>>> REPLY = "I'm not sure"
>>> print("Human: ", REPLY)
Human: I'm not sure
>>> NEXT_UTTERANCE = (
... "My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. "
... "Are they trying to lose weight or are they just trying to be healthier?</s> "
... "<s> I'm not sure."
... )
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
>>> next_reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
Bot: That's too bad. Have you tried encouraging them to change their eating habits?
```
""" """
BLENDERBOT_INPUTS_DOCSTRING = r""" BLENDERBOT_INPUTS_DOCSTRING = r"""
@@ -586,11 +603,10 @@ BLENDERBOT_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -907,8 +923,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1130,13 +1146,13 @@ class BlenderbotModel(BlenderbotPreTrainedModel):
>>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill") >>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill")
>>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill") >>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 6, 1280]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
@@ -1389,7 +1405,7 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-large->facebook/blenderbot-400M-distill # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-base->facebook/blenderbot-400M-distill
class BlenderbotForCausalLM(BlenderbotPreTrainedModel): class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
@@ -1520,6 +1536,9 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -504,20 +504,37 @@ BLENDERBOT_SMALL_START_DOCSTRING = r"""
""" """
BLENDERBOT_SMALL_GENERATION_EXAMPLE = r""" BLENDERBOT_SMALL_GENERATION_EXAMPLE = r"""
Conversation example:: Conversation example:
>>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration >>> mname = ```python
'facebook/blenderbot_small-90M' >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname) >>> >>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat
too many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>>
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids,
skip_special_tokens=True)[0]) what kind of carbs do they eat? i don't know much about carbs.
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they >>> mname = "facebook/blenderbot_small-90M"
eat too many carbs.</s> " ... "<s>what kind of carbs do they eat? i don't know much about carbs.</s> " ... >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname)
"<s>I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') >>> >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname)
inputs.pop("token_type_ids") >>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", >>> UTTERANCE = "My friends are cool but they eat too many carbs."
tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0]) >>> print("Human: ", UTTERANCE)
Human: My friends are cool but they eat too many carbs.
>>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
>>> reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
Bot: what kind of carbs do they eat? i don't know much about carbs.
>>> REPLY = "I'm not sure"
>>> print("Human: ", REPLY)
Human: I'm not sure
>>> NEXT_UTTERANCE = (
... "My friends are cool but they eat too many carbs.</s> <s>what kind of carbs do they eat? "
... "i don't know much about carbs</s> "
... "<s> I'm not sure."
... )
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
>>> next_reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
Bot: they eat a lot of carbs. carbs are high in fat, protein, and carbohydrates.
```
""" """
BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
@@ -584,11 +601,10 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -902,8 +918,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1113,13 +1129,13 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel):
>>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M") >>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M")
>>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M") >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt") # Batch size 1
>>> ).input_ids # Batch size 1 >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 3, 512]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
@@ -1360,7 +1376,7 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-large->facebook/blenderbot_small-90M # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-base->facebook/blenderbot_small-90M
class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel): class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
@@ -1491,6 +1507,9 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -523,27 +523,28 @@ MARIAN_START_DOCSTRING = r"""
""" """
MARIAN_GENERATION_EXAMPLE = r""" MARIAN_GENERATION_EXAMPLE = r"""
Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Available
Available models are listed [here](https://huggingface.co/models?search=Helsinki-NLP). models are listed [here](https://huggingface.co/models?search=Helsinki-NLP).
Examples: Examples:
```python ```python
>>> from transformers import MarianTokenizer, MarianMTModel >>> from transformers import MarianTokenizer, MarianMTModel
>>> from typing import List
>>> src = "fr" # source language >>> src = "fr" # source language
>>> trg = "en" # target language >>> trg = "en" # target language
>>> sample_text = "où est l'arrêt de bus ?"
>>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
>>> model = MarianMTModel.from_pretrained(model_name) >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
>>> tokenizer = MarianTokenizer.from_pretrained(model_name) >>> model = MarianMTModel.from_pretrained(model_name)
>>> batch = tokenizer([sample_text], return_tensors="pt") >>> tokenizer = MarianTokenizer.from_pretrained(model_name)
>>> gen = model.generate(**batch)
>>> tokenizer.batch_decode(gen, skip_special_tokens=True) >>> sample_text = "où est l'arrêt de bus ?"
"Where is the bus stop ?" >>> batch = tokenizer([sample_text], return_tensors="pt")
```
>>> generated_ids = model.generate(**batch)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
"Where's the bus stop?"
```
""" """
MARIAN_INPUTS_DOCSTRING = r""" MARIAN_INPUTS_DOCSTRING = r"""
@@ -927,7 +928,7 @@ class MarianDecoder(MarianPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
@@ -1136,17 +1137,17 @@ class MarianModel(MarianPreTrainedModel):
>>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de") >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de") >>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer(
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer(
... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen", ... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen",
... return_tensors="pt", ... return_tensors="pt",
... add_special_tokens=False, ... add_special_tokens=False,
>>> ).input_ids # Batch size 1 ... )
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 26, 512]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
@@ -1400,7 +1401,7 @@ class MarianDecoderWrapper(MarianPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-large->Helsinki-NLP/opus-mt-fr-en # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-base->Helsinki-NLP/opus-mt-fr-en
class MarianForCausalLM(MarianPreTrainedModel): class MarianForCausalLM(MarianPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
@@ -1529,6 +1530,9 @@ class MarianForCausalLM(MarianPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -51,6 +51,16 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25"
_CONFIG_FOR_DOC = "MBartConfig" _CONFIG_FOR_DOC = "MBartConfig"
_TOKENIZER_FOR_DOC = "MBartTokenizer" _TOKENIZER_FOR_DOC = "MBartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 3.04
_QA_EXPECTED_OUTPUT_SHAPE = [1, 16]
MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/mbart-large-cc25", "facebook/mbart-large-cc25",
@@ -532,20 +542,21 @@ MBART_START_DOCSTRING = r"""
""" """
MBART_GENERATION_EXAMPLE = r""" MBART_GENERATION_EXAMPLE = r"""
Summarization example: Translation example:
```python ```python
>>> from transformers import MBartTokenizer, MBartForConditionalGeneration >>> from transformers import MBartTokenizer, MBartForConditionalGeneration
>>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
>>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
>>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." >>> example_english_phrase = "42 is the answer"
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
>>> # Generate Summary >>> # Translate
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> generated_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'42 este răspuns'
``` ```
Mask filling example: Mask filling example:
@@ -567,6 +578,7 @@ MBART_GENERATION_EXAMPLE = r"""
>>> values, predictions = probs.topk(5) >>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split() >>> tokenizer.decode(predictions).split()
['nett', 'sehr', 'ganz', 'nicht', 'so']
``` ```
""" """
@@ -639,11 +651,10 @@ MBART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -966,8 +977,8 @@ class MBartDecoder(MBartPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1153,6 +1164,7 @@ class MBartModel(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
@@ -1428,6 +1440,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward( def forward(
@@ -1553,6 +1566,8 @@ class MBartForQuestionAnswering(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
def forward( def forward(
@@ -1665,7 +1680,7 @@ class MBartDecoderWrapper(MBartPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-large->facebook/mbart-large-cc25 # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25
class MBartForCausalLM(MBartPreTrainedModel): class MBartForCausalLM(MBartPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
@@ -1794,6 +1809,9 @@ class MBartForCausalLM(MBartPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -529,7 +529,8 @@ PEGASUS_GENERATION_EXAMPLE = r"""
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"]) >>> summary_ids = model.generate(inputs["input_ids"])
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"California's largest electricity provider has turned off power to hundreds of thousands of customers."
``` ```
""" """
@@ -597,11 +598,10 @@ PEGASUS_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
@@ -977,8 +977,8 @@ class PegasusDecoder(PegasusPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1211,13 +1211,13 @@ class PegasusModel(PegasusPreTrainedModel):
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large") >>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> model = PegasusModel.from_pretrained("google/pegasus-large") >>> model = PegasusModel.from_pretrained("google/pegasus-large")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt")
>>> ).input_ids # Batch size 1 >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 4, 1024]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -1540,7 +1540,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
self.model.decoder.resize_position_embeddings(new_num_position_embeddings) self.model.decoder.resize_position_embeddings(new_num_position_embeddings)
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-large->google/pegasus-large # Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-base->google/pegasus-large
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
@@ -1637,6 +1637,9 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -50,6 +50,12 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base"
_CONFIG_FOR_DOC = "PLBartConfig" _CONFIG_FOR_DOC = "PLBartConfig"
_TOKENIZER_FOR_DOC = "PLBartTokenizer" _TOKENIZER_FOR_DOC = "PLBartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"uclanlp/plbart-base", "uclanlp/plbart-base",
@@ -526,27 +532,26 @@ PLBART_START_DOCSTRING = r"""
""" """
PLBART_GENERATION_EXAMPLE = r""" PLBART_GENERATION_EXAMPLE = r"""
Token in-filling example:
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration, PLBartConfig
>>> model = PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> tokenizer =
PLBartTokenizer.from_pretrained('uclanlp/plbart-base', src_lang='java', tgt_lang='java') >>> METHOD_TO_FILL =
"public static main (String args[0]) { data=Date(); System.out. String.format("Current Date : % tc", ));}" >>>
inputs = tokenizer([METHOD_TO_FILL], max_length=1024, return_tensors='pt') >>> # Generate Filled Code >>>
generated_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True) >>>
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in
generated_ids])
Mask-filling example: Mask-filling example:
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration >>> tokenizer = ```python
PLBartTokenizer.from_pretrained('uclanlp/plbart-base') >>> # en_XX is the language symbol id <LID> for English >>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration
>>> TXT = "</s> Is 0 the <mask> Fibonacci <mask> ? </s> en_XX" >>> model =
PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> input_ids = tokenizer([TXT], >>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base")
add_special_tokens=False, return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits >>> >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0,
masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split() >>> # en_XX is the language symbol id <LID> for English
>>> TXT = "<s> Is 0 the <mask> Fibonacci number ? </s> en_XX"
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids
>>> logits = model(input_ids).logits
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split()
['same', 'first', 'highest', 'result', 'Fib']
```
""" """
PLBART_INPUTS_DOCSTRING = r""" PLBART_INPUTS_DOCSTRING = r"""
@@ -619,7 +624,7 @@ PLBART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. `decoder_input_ids` of shape `(batch_size, sequence_length)`.
inputs_embeds (: inputs_embeds (:
obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally,
instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful
@@ -948,8 +953,8 @@ class PLBartDecoder(PLBartPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
@@ -1406,6 +1411,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward( def forward(
@@ -1521,7 +1527,7 @@ class PLBartDecoderWrapper(PLBartPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart, facebook/bart-base->uclanlp/plbart-base
class PLBartForCausalLM(PLBartPreTrainedModel): class PLBartForCausalLM(PLBartPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
@@ -1643,13 +1649,16 @@ class PLBartForCausalLM(PLBartPreTrainedModel):
```python ```python
>>> from transformers import PLBartTokenizer, PLBartForCausalLM >>> from transformers import PLBartTokenizer, PLBartForCausalLM
>>> tokenizer = PLBartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
>>> model = PLBartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) >>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions

View File

@@ -20,5 +20,13 @@ src/transformers/models/poolformer/modeling_poolformer.py
src/transformers/models/vit_mae/modeling_vit_mae.py src/transformers/models/vit_mae/modeling_vit_mae.py
src/transformers/models/segformer/modeling_segformer.py src/transformers/models/segformer/modeling_segformer.py
src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
src/transformers/models/bart/modeling_bart.py
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
src/transformers/models/marian/modeling_marian.py
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
src/transformers/models/plbart/modeling_plbart.py
docs/source/quicktour.mdx docs/source/quicktour.mdx
docs/source/task_summary.mdx docs/source/task_summary.mdx