From a69e185074fff529ed60d936c6afe05580aee8ac Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Wed, 9 Mar 2022 20:30:38 +0100 Subject: [PATCH] add doctests for bart like seq2seq models (#15987) * boom boom * enable doctest for few seq2seq models * add seq2seq models in documentation_tests.txt * fix docstring blenderbot * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * fix seq classif doc sample * don't check loss for seq classif examples * +IGNORE_OUTPUT => +IGNORE_RESULT * fix _SEQ_CLASS_EXPECTED_OUTPUT_SHAPE * fix some docs * more fixes * last fix (hopefully) * fix big bird gen example * fix mbart gen example Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/file_utils.py | 27 ++++++-- src/transformers/models/bart/modeling_bart.py | 56 ++++++++++----- .../modeling_bigbird_pegasus.py | 41 ++++++++--- .../models/blenderbot/modeling_blenderbot.py | 67 +++++++++++------- .../modeling_blenderbot_small.py | 69 ++++++++++++------- .../models/marian/modeling_marian.py | 54 ++++++++------- .../models/mbart/modeling_mbart.py | 50 +++++++++----- .../models/pegasus/modeling_pegasus.py | 31 +++++---- .../models/plbart/modeling_plbart.py | 59 +++++++++------- utils/documentation_tests.txt | 8 +++ 10 files changed, 301 insertions(+), 161 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 8a19d298ea..5948e7070a 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -1012,6 +1012,8 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" >>> from transformers import {processor_class}, {model_class} >>> import torch + >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT + >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") @@ -1022,8 +1024,16 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions) >>> loss = outputs.loss + >>> round(loss.item(), 2) + {expected_loss} + >>> start_scores = outputs.start_logits + >>> list(start_scores.shape) + {expected_output} + >>> end_scores = outputs.end_logits + >>> list(end_scores.shape) + {expected_output} ``` """ @@ -1031,33 +1041,40 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" Example of single-label classification: ```python - >>> from transformers import {processor_class}, {model_class} >>> import torch + >>> from transformers import {processor_class}, {model_class} + + >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") - >>> model = {model_class}.from_pretrained("{checkpoint}") + >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=2) >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 >>> outputs = model(**inputs, labels=labels) >>> loss = outputs.loss >>> logits = outputs.logits + >>> list(logits.shape) + {expected_output} ``` Example of multi-label classification: ```python - >>> from transformers import {processor_class}, {model_class} >>> import torch + >>> from transformers import {processor_class}, {model_class} + + >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") - >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") + >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification", num_labels=2) >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss >>> outputs = model(**inputs, labels=labels) >>> loss = outputs.loss - >>> logits = outputs.logits + >>> list(logits.shape) + {expected_output} ``` """ diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 92f15c9646..0d7b4608eb 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -48,14 +48,24 @@ from .configuration_bart import BartConfig logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "facebook/bart-large" +_CHECKPOINT_FOR_DOC = "facebook/bart-base" _CONFIG_FOR_DOC = "BartConfig" _TOKENIZER_FOR_DOC = "BartTokenizer" +# Base model docstring +_EXPECTED_OUTPUT_SHAPE = [1, 8, 768] + +# SequenceClassification docstring +_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2] + +# QuestionAsnwering docstring +_QA_EXPECTED_LOSS = 2.98 +_QA_EXPECTED_OUTPUT_SHAPE = [1, 17] + BART_PRETRAINED_MODEL_ARCHIVE_LIST = [ "facebook/bart-large", - # See all BART models at https://huggingface.co/models?filter=bart + # see all BART models at https://huggingface.co/models?filter=bart ] @@ -542,12 +552,17 @@ BART_GENERATION_EXAMPLE = r""" >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> ARTICLE_TO_SUMMARIZE = ( + ... "PG&E stated it scheduled the blackouts in response to forecasts for high winds " + ... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were " + ... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow." + ... ) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") >>> # Generate Summary - >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) - >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=2, max_length=20) + >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + 'PG&E scheduled the blackouts in response to forecasts for high winds amid dry conditions' ``` Mask filling example: @@ -555,10 +570,10 @@ BART_GENERATION_EXAMPLE = r""" ```python >>> from transformers import BartTokenizer, BartForConditionalGeneration - >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") - >>> TXT = "My friends are but they eat too many carbs." + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") + >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-base") - >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large") + >>> TXT = "My friends are but they eat too many carbs." >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] >>> logits = model(input_ids).logits @@ -567,6 +582,7 @@ BART_GENERATION_EXAMPLE = r""" >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split() + ['not', 'good', 'healthy', 'great', 'very'] ``` """ @@ -641,11 +657,10 @@ BART_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -966,8 +981,8 @@ class BartDecoder(BartPretrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1153,6 +1168,7 @@ class BartModel(BartPretrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -1434,6 +1450,7 @@ class BartForSequenceClassification(BartPretrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -1558,6 +1575,8 @@ class BartForQuestionAnswering(BartPretrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_loss=_QA_EXPECTED_LOSS, + expected_output=_QA_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -1789,13 +1808,16 @@ class BartForCausalLM(BartPretrainedModel): ```python >>> from transformers import BartTokenizer, BartForCausalLM - >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") - >>> model = BartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) + >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") + >>> model = BartForCausalLM.from_pretrained("facebook/bart-base", add_cross_attention=False) >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index b1e0052b2f..14030d107f 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -53,6 +53,16 @@ _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv" _CONFIG_FOR_DOC = "BigBirdPegasusConfig" _TOKENIZER_FOR_DOC = "PegasusTokenizer" +# Base model docstring +_EXPECTED_OUTPUT_SHAPE = [1, 7, 1024] + +# SequenceClassification docstring +_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2] + +# QuestionAsnwering docstring +_QA_EXPECTED_LOSS = 2.56 +_QA_EXPECTED_OUTPUT_SHAPE = [1, 12] + BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [ "google/bigbird-pegasus-large-arxiv", @@ -1627,12 +1637,20 @@ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r""" >>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv") >>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv") - >>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." + >>> ARTICLE_TO_SUMMARIZE = ( + ... "The dominant sequence transduction models are based on complex recurrent or convolutional neural " + ... "networks in an encoder-decoder configuration. The best performing models also connect the encoder " + ... "and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, " + ... "based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. " + ... "Experiments on two machine translation tasks show these models to be superior in quality " + ... "while being more parallelizable and requiring significantly less time to train." + ... ) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True) >>> # Generate Summary - >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) - >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=15) + >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + 'dominant sequence models are based on recurrent or convolutional neural networks .' ``` """ @@ -1684,11 +1702,10 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -2159,8 +2176,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -2346,6 +2363,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -2630,6 +2648,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -2755,6 +2774,8 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_loss=_QA_EXPECTED_LOSS, + expected_output=_QA_EXPECTED_OUTPUT_SHAPE, ) def forward( self, diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index 5706c5a4c3..8db91b615a 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -506,20 +506,37 @@ BLENDERBOT_START_DOCSTRING = r""" """ BLENDERBOT_GENERATION_EXAMPLE = r""" - Conversation example:: + Conversation example: - >>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration >>> mname = - 'facebook/blenderbot-400M-distill' >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname) >>> - tokenizer = BlenderbotTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat too - many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>> - reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids, - skip_special_tokens=True)[0]) + ```python + >>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration - >>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they - eat too many carbs. That's unfortunate. " ... "Are they trying to lose weight or are they just trying to - be healthier? " ... " I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') - >>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, - skip_special_tokens=True)[0]) + >>> mname = "facebook/blenderbot-400M-distill" + >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname) + >>> tokenizer = BlenderbotTokenizer.from_pretrained(mname) + >>> UTTERANCE = "My friends are cool but they eat too many carbs." + >>> print("Human: ", UTTERANCE) + Human: My friends are cool but they eat too many carbs. + + >>> inputs = tokenizer([UTTERANCE], return_tensors="pt") + >>> reply_ids = model.generate(**inputs) + >>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]) + Bot: That's unfortunate. Are they trying to lose weight or are they just trying to be healthier? + + >>> REPLY = "I'm not sure" + >>> print("Human: ", REPLY) + Human: I'm not sure + + >>> NEXT_UTTERANCE = ( + ... "My friends are cool but they eat too many carbs. That's unfortunate. " + ... "Are they trying to lose weight or are they just trying to be healthier? " + ... " I'm not sure." + ... ) + >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt") + >>> next_reply_ids = model.generate(**inputs) + >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0]) + Bot: That's too bad. Have you tried encouraging them to change their eating habits? + ``` """ BLENDERBOT_INPUTS_DOCSTRING = r""" @@ -586,11 +603,10 @@ BLENDERBOT_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -907,8 +923,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1130,13 +1146,13 @@ class BlenderbotModel(BlenderbotPreTrainedModel): >>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill") >>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill") - >>> input_ids = tokenizer( - ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 + >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt") >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 - >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) + >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_input_ids) >>> last_hidden_states = outputs.last_hidden_state + >>> list(last_hidden_states.shape) + [1, 6, 1280] ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1389,7 +1405,7 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel): return self.decoder(*args, **kwargs) -# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-large->facebook/blenderbot-400M-distill +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-base->facebook/blenderbot-400M-distill class BlenderbotForCausalLM(BlenderbotPreTrainedModel): def __init__(self, config): config = copy.deepcopy(config) @@ -1520,6 +1536,9 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel): >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index 89153c16ec..00517e21e3 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -504,20 +504,37 @@ BLENDERBOT_SMALL_START_DOCSTRING = r""" """ BLENDERBOT_SMALL_GENERATION_EXAMPLE = r""" - Conversation example:: + Conversation example: - >>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration >>> mname = - 'facebook/blenderbot_small-90M' >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname) >>> - tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat - too many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>> - reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids, - skip_special_tokens=True)[0]) what kind of carbs do they eat? i don't know much about carbs. + ```python + >>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration - >>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they - eat too many carbs. " ... "what kind of carbs do they eat? i don't know much about carbs. " ... - "I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') >>> - inputs.pop("token_type_ids") >>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", - tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0]) + >>> mname = "facebook/blenderbot_small-90M" + >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname) + >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) + >>> UTTERANCE = "My friends are cool but they eat too many carbs." + >>> print("Human: ", UTTERANCE) + Human: My friends are cool but they eat too many carbs. + + >>> inputs = tokenizer([UTTERANCE], return_tensors="pt") + >>> reply_ids = model.generate(**inputs) + >>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]) + Bot: what kind of carbs do they eat? i don't know much about carbs. + + >>> REPLY = "I'm not sure" + >>> print("Human: ", REPLY) + Human: I'm not sure + + >>> NEXT_UTTERANCE = ( + ... "My friends are cool but they eat too many carbs. what kind of carbs do they eat? " + ... "i don't know much about carbs " + ... " I'm not sure." + ... ) + >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt") + >>> next_reply_ids = model.generate(**inputs) + >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0]) + Bot: they eat a lot of carbs. carbs are high in fat, protein, and carbohydrates. + ``` """ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" @@ -584,11 +601,10 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -902,8 +918,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1113,13 +1129,13 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel): >>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M") >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M") - >>> input_ids = tokenizer( - ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 - >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 - >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) + >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt") + >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt") # Batch size 1 + >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids) >>> last_hidden_states = outputs.last_hidden_state + >>> list(last_hidden_states.shape) + [1, 3, 512] ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1360,7 +1376,7 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel): return self.decoder(*args, **kwargs) -# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-large->facebook/blenderbot_small-90M +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-base->facebook/blenderbot_small-90M class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel): def __init__(self, config): config = copy.deepcopy(config) @@ -1491,6 +1507,9 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel): >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index 960ac61c81..33f15a3525 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -523,27 +523,28 @@ MARIAN_START_DOCSTRING = r""" """ MARIAN_GENERATION_EXAMPLE = r""" - Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. - Available models are listed [here](https://huggingface.co/models?search=Helsinki-NLP). + Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Available + models are listed [here](https://huggingface.co/models?search=Helsinki-NLP). - Examples: + Examples: - ```python - >>> from transformers import MarianTokenizer, MarianMTModel - >>> from typing import List + ```python + >>> from transformers import MarianTokenizer, MarianMTModel - >>> src = "fr" # source language - >>> trg = "en" # target language - >>> sample_text = "où est l'arrêt de bus ?" - >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}" + >>> src = "fr" # source language + >>> trg = "en" # target language - >>> model = MarianMTModel.from_pretrained(model_name) - >>> tokenizer = MarianTokenizer.from_pretrained(model_name) - >>> batch = tokenizer([sample_text], return_tensors="pt") - >>> gen = model.generate(**batch) - >>> tokenizer.batch_decode(gen, skip_special_tokens=True) - "Where is the bus stop ?" - ``` + >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}" + >>> model = MarianMTModel.from_pretrained(model_name) + >>> tokenizer = MarianTokenizer.from_pretrained(model_name) + + >>> sample_text = "où est l'arrêt de bus ?" + >>> batch = tokenizer([sample_text], return_tensors="pt") + + >>> generated_ids = model.generate(**batch) + >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + "Where's the bus stop?" + ``` """ MARIAN_INPUTS_DOCSTRING = r""" @@ -927,7 +928,7 @@ class MarianDecoder(MarianPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors @@ -1136,17 +1137,17 @@ class MarianModel(MarianPreTrainedModel): >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de") >>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de") - >>> input_ids = tokenizer( - ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 - >>> decoder_input_ids = tokenizer( + >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt") + >>> decoder_inputs = tokenizer( ... " Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen", ... return_tensors="pt", ... add_special_tokens=False, - >>> ).input_ids # Batch size 1 - >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) + ... ) + >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids) >>> last_hidden_states = outputs.last_hidden_state + >>> list(last_hidden_states.shape) + [1, 26, 512] ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1400,7 +1401,7 @@ class MarianDecoderWrapper(MarianPreTrainedModel): return self.decoder(*args, **kwargs) -# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-large->Helsinki-NLP/opus-mt-fr-en +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-base->Helsinki-NLP/opus-mt-fr-en class MarianForCausalLM(MarianPreTrainedModel): def __init__(self, config): config = copy.deepcopy(config) @@ -1529,6 +1530,9 @@ class MarianForCausalLM(MarianPreTrainedModel): >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 91a0289a6c..8e2adaf9c6 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -51,6 +51,16 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" _CONFIG_FOR_DOC = "MBartConfig" _TOKENIZER_FOR_DOC = "MBartTokenizer" +# Base model docstring +_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024] + +# SequenceClassification docstring +_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2] + +# QuestionAsnwering docstring +_QA_EXPECTED_LOSS = 3.04 +_QA_EXPECTED_OUTPUT_SHAPE = [1, 16] + MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ "facebook/mbart-large-cc25", @@ -532,20 +542,21 @@ MBART_START_DOCSTRING = r""" """ MBART_GENERATION_EXAMPLE = r""" - Summarization example: + Translation example: ```python >>> from transformers import MBartTokenizer, MBartForConditionalGeneration - >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") - >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") + >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro") + >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro") - >>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." - >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") + >>> example_english_phrase = "42 is the answer" + >>> inputs = tokenizer(example_english_phrase, return_tensors="pt") - >>> # Generate Summary - >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) - >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + >>> # Translate + >>> generated_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) + >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + '42 este răspuns' ``` Mask filling example: @@ -567,6 +578,7 @@ MBART_GENERATION_EXAMPLE = r""" >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split() + ['nett', 'sehr', 'ganz', 'nicht', 'so'] ``` """ @@ -639,11 +651,10 @@ MBART_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -966,8 +977,8 @@ class MBartDecoder(MBartPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1153,6 +1164,7 @@ class MBartModel(MBartPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_EXPECTED_OUTPUT_SHAPE, ) def forward( self, @@ -1428,6 +1440,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE, ) # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( @@ -1553,6 +1566,8 @@ class MBartForQuestionAnswering(MBartPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_loss=_QA_EXPECTED_LOSS, + expected_output=_QA_EXPECTED_OUTPUT_SHAPE, ) # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward def forward( @@ -1665,7 +1680,7 @@ class MBartDecoderWrapper(MBartPreTrainedModel): return self.decoder(*args, **kwargs) -# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-large->facebook/mbart-large-cc25 +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25 class MBartForCausalLM(MBartPreTrainedModel): def __init__(self, config): config = copy.deepcopy(config) @@ -1794,6 +1809,9 @@ class MBartForCausalLM(MBartPreTrainedModel): >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index ad6f74e00f..42bc6595d0 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -529,7 +529,8 @@ PEGASUS_GENERATION_EXAMPLE = r""" >>> # Generate Summary >>> summary_ids = model.generate(inputs["input_ids"]) - >>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) + >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "California's largest electricity provider has turned off power to hundreds of thousands of customers." ``` """ @@ -597,11 +598,10 @@ PEGASUS_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of - shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` - you can choose to directly pass an embedded representation. This is useful if you want more control over - how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup - matrix. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape + `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you + can choose to directly pass an embedded representation. This is useful if you want more control over how to + convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be @@ -977,8 +977,8 @@ class PegasusDecoder(PegasusPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1211,13 +1211,13 @@ class PegasusModel(PegasusPreTrainedModel): >>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large") >>> model = PegasusModel.from_pretrained("google/pegasus-large") - >>> input_ids = tokenizer( - ... "Studies have been shown that owning a dog is good for you", return_tensors="pt" - >>> ).input_ids # Batch size 1 - >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 - >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) + >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt") + >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt") + >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids) >>> last_hidden_states = outputs.last_hidden_state + >>> list(last_hidden_states.shape) + [1, 4, 1024] ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1540,7 +1540,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel): self.model.decoder.resize_position_embeddings(new_num_position_embeddings) @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) - # Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-large->google/pegasus-large + # Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-base->google/pegasus-large def forward( self, input_ids=None, @@ -1637,6 +1637,9 @@ class PegasusForCausalLM(PegasusPreTrainedModel): >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index b8db30cc9c..54ffa1e6a8 100755 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -50,6 +50,12 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base" _CONFIG_FOR_DOC = "PLBartConfig" _TOKENIZER_FOR_DOC = "PLBartTokenizer" +# Base model docstring +_EXPECTED_OUTPUT_SHAPE = [1, 8, 768] + +# SequenceClassification docstring +_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2] + PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ "uclanlp/plbart-base", @@ -526,27 +532,26 @@ PLBART_START_DOCSTRING = r""" """ PLBART_GENERATION_EXAMPLE = r""" - Token in-filling example: - - >>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration, PLBartConfig - - >>> model = PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> tokenizer = - PLBartTokenizer.from_pretrained('uclanlp/plbart-base', src_lang='java', tgt_lang='java') >>> METHOD_TO_FILL = - "public static main (String args[0]) { data=Date(); System.out. String.format("Current Date : % tc", ));}" >>> - inputs = tokenizer([METHOD_TO_FILL], max_length=1024, return_tensors='pt') >>> # Generate Filled Code >>> - generated_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True) >>> - print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in - generated_ids]) - Mask-filling example: - >>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration >>> tokenizer = - PLBartTokenizer.from_pretrained('uclanlp/plbart-base') >>> # en_XX is the language symbol id for English - >>> TXT = " Is 0 the Fibonacci ? en_XX" >>> model = - PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> input_ids = tokenizer([TXT], - add_special_tokens=False, return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits >>> - masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0, - masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split() + ```python + >>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration + + >>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base") + >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base") + + >>> # en_XX is the language symbol id for English + >>> TXT = " Is 0 the Fibonacci number ? en_XX" + >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids + + >>> logits = model(input_ids).logits + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ['same', 'first', 'highest', 'result', 'Fib'] + ``` """ PLBART_INPUTS_DOCSTRING = r""" @@ -619,7 +624,7 @@ PLBART_INPUTS_DOCSTRING = r""" If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all - ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. + `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (: obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful @@ -948,8 +953,8 @@ class PLBartDecoder(PLBartPreTrainedModel): If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of - all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` - of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing + all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of + shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. @@ -1406,6 +1411,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel): checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE, ) # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( @@ -1521,7 +1527,7 @@ class PLBartDecoderWrapper(PLBartPreTrainedModel): return self.decoder(*args, **kwargs) -# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart +# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart, facebook/bart-base->uclanlp/plbart-base class PLBartForCausalLM(PLBartPreTrainedModel): def __init__(self, config): config = copy.deepcopy(config) @@ -1643,13 +1649,16 @@ class PLBartForCausalLM(PLBartPreTrainedModel): ```python >>> from transformers import PLBartTokenizer, PLBartForCausalLM - >>> tokenizer = PLBartTokenizer.from_pretrained("facebook/bart-large") - >>> model = PLBartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) + >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base") + >>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base", add_cross_attention=False) >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs) >>> logits = outputs.logits + >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size] + >>> list(logits.shape) == expected_shape + True ```""" output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 43135e225c..7c15c26f07 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -20,5 +20,13 @@ src/transformers/models/poolformer/modeling_poolformer.py src/transformers/models/vit_mae/modeling_vit_mae.py src/transformers/models/segformer/modeling_segformer.py src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +src/transformers/models/bart/modeling_bart.py +src/transformers/models/mbart/modeling_mbart.py +src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +src/transformers/models/marian/modeling_marian.py +src/transformers/models/pegasus/modeling_pegasus.py +src/transformers/models/blenderbot/modeling_blenderbot.py +src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +src/transformers/models/plbart/modeling_plbart.py docs/source/quicktour.mdx docs/source/task_summary.mdx \ No newline at end of file