From 77c5a805366af9f6e8b7a9d4006a3d97b6d139a2 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 24 Mar 2022 00:17:00 +0100 Subject: [PATCH] [Doctests] Make roberta-like meaningfull (#16363) * [Doctests] Make roberta-like meaningfull * correct * final correct * Trigger test * make style * apply suggestion from sylvain --- .../models/roberta/modeling_roberta.py | 14 +- src/transformers/utils/doc.py | 130 +++++++++++++----- utils/documentation_tests.txt | 27 ++-- 3 files changed, 118 insertions(+), 53 deletions(-) diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index 58fc3afc5b..fd21085e32 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1067,6 +1067,8 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", + expected_output="' Paris'", + expected_loss=0.1, ) def forward( self, @@ -1177,9 +1179,11 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="cardiffnlp/twitter-roberta-base-emotion", output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="'optimism'", + expected_loss=0.08, ) def forward( self, @@ -1372,9 +1376,11 @@ class RobertaForTokenClassification(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="Jean-Baptiste/roberta-large-ner-english", output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="['O', 'ORG', 'ORG', 'O', 'O', 'O', 'O', 'O', 'LOC', 'O', 'LOC', 'LOC']", + expected_loss=0.01, ) def forward( self, @@ -1475,9 +1481,11 @@ class RobertaForQuestionAnswering(RobertaPreTrainedModel): @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="deepset/roberta-base-squad2", output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output="' puppet'", + expected_loss=0.86, ) def forward( self, diff --git a/src/transformers/utils/doc.py b/src/transformers/utils/doc.py index fdb65d6de5..17f8adeb26 100644 --- a/src/transformers/utils/doc.py +++ b/src/transformers/utils/doc.py @@ -156,12 +156,28 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r""" >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") - >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") - >>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1 + >>> inputs = tokenizer( + ... "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt" + ... ) - >>> outputs = model(**inputs, labels=labels) - >>> loss = outputs.loss - >>> logits = outputs.logits + >>> with torch.no_grad(): + ... logits = model(**inputs).logits + + >>> predicted_token_class_ids = logits.argmax(-1) + + >>> # Note that tokens are classified rather then input words which means that + >>> # there might be more predicted token classes than words. + >>> # Multiple token classes might account for the same word + >>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]] + >>> predicted_tokens_classes + {expected_output} + ``` + + ```python + >>> labels = predicted_token_class_ids + >>> loss = model(**inputs, labels=labels).loss + >>> round(loss.item(), 2) + {expected_loss} ``` """ @@ -172,28 +188,31 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" >>> from transformers import {processor_class}, {model_class} >>> import torch - >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT - >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" - >>> inputs = tokenizer(question, text, return_tensors="pt") - >>> start_positions = torch.tensor([1]) - >>> end_positions = torch.tensor([3]) - >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions) + >>> inputs = tokenizer(question, text, return_tensors="pt") + >>> with torch.no_grad(): + ... outputs = model(**inputs) + + >>> answer_start_index = outputs.start_logits.argmax() + >>> answer_end_index = outputs.end_logits.argmax() + + >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] + >>> tokenizer.decode(predict_answer_tokens) + {expected_output} + ``` + + ```python + >>> # target is "nice puppet" + >>> target_start_index, target_end_index = torch.tensor([14]), torch.tensor([15]) + + >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) >>> loss = outputs.loss >>> round(loss.item(), 2) {expected_loss} - - >>> start_scores = outputs.start_logits - >>> list(start_scores.shape) - {expected_output} - - >>> end_scores = outputs.end_logits - >>> list(end_scores.shape) - {expected_output} ``` """ @@ -204,40 +223,62 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" >>> import torch >>> from transformers import {processor_class}, {model_class} - >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT - >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") - >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=2) + >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") - >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(**inputs, labels=labels) - >>> loss = outputs.loss - >>> logits = outputs.logits - >>> list(logits.shape) + + >>> with torch.no_grad(): + ... logits = model(**inputs).logits + + >>> predicted_class_id = logits.argmax().item() + >>> model.config.id2label[predicted_class_id] {expected_output} ``` + ```python + >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` + >>> num_labels = len(model.config.id2label) + >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) + + >>> labels = torch.tensor(1) + >>> loss = model(**inputs, labels=labels).loss + >>> round(loss.item(), 2) + {expected_loss} + ``` + Example of multi-label classification: ```python >>> import torch >>> from transformers import {processor_class}, {model_class} - >>> torch.manual_seed(0) # doctest: +IGNORE_RESULT - >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") - >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification", num_labels=2) + >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") - >>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss - >>> outputs = model(**inputs, labels=labels) - >>> loss = outputs.loss - >>> list(logits.shape) + + >>> with torch.no_grad(): + ... logits = model(**inputs).logits + + >>> predicted_class_id = logits.argmax().item() + >>> model.config.id2label[predicted_class_id] {expected_output} ``` -""" + ```python + >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` + >>> num_labels = len(model.config.id2label) + >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) + + >>> num_labels = len(model.config.id2label) + >>> labels = torch.nn.functional.one_hot(torch.tensor([predicted_class_id]), num_classes=num_labels).to( + ... torch.float + ... ) + >>> loss = model(**inputs, labels=labels).loss + >>> loss.backward() # doctest: +IGNORE_RESULT + ``` +""" PT_MASKED_LM_SAMPLE = r""" Example: @@ -250,11 +291,26 @@ PT_MASKED_LM_SAMPLE = r""" >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt") + + >>> with torch.no_grad(): + ... logits = model(**inputs).logits + + >>> # retrieve index of {mask} + >>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0] + + >>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1) + >>> tokenizer.decode(predicted_token_id) + {expected_output} + ``` + + ```python >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"] + >>> # mask labels of non-{mask} tokens + >>> labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100) >>> outputs = model(**inputs, labels=labels) - >>> loss = outputs.loss - >>> logits = outputs.logits + >>> round(outputs.loss.item(), 2) + {expected_loss} ``` """ diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index b8632f29f9..7d31045184 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -1,21 +1,35 @@ docs/source/quicktour.mdx +docs/source/quicktour.mdx +docs/source/task_summary.mdx docs/source/task_summary.mdx src/transformers/generation_utils.py +src/transformers/generation_utils.py +src/transformers/models/bart/modeling_bart.py src/transformers/models/bart/modeling_bart.py src/transformers/models/beit/modeling_beit.py src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py src/transformers/models/blenderbot/modeling_blenderbot.py +src/transformers/models/blenderbot/modeling_blenderbot.py +src/transformers/models/blenderbot_small/modeling_blenderbot_small.py src/transformers/models/blenderbot_small/modeling_blenderbot_small.py src/transformers/models/convnext/modeling_convnext.py src/transformers/models/data2vec/modeling_data2vec_audio.py src/transformers/models/deit/modeling_deit.py +src/transformers/models/glpn/modeling_glpn.py src/transformers/models/hubert/modeling_hubert.py src/transformers/models/marian/modeling_marian.py +src/transformers/models/marian/modeling_marian.py +src/transformers/models/mbart/modeling_mbart.py src/transformers/models/mbart/modeling_mbart.py src/transformers/models/pegasus/modeling_pegasus.py +src/transformers/models/pegasus/modeling_pegasus.py +src/transformers/models/plbart/modeling_plbart.py src/transformers/models/plbart/modeling_plbart.py src/transformers/models/poolformer/modeling_poolformer.py src/transformers/models/resnet/modeling_resnet.py +src/transformers/models/resnet/modeling_resnet.py +src/transformers/models/roberta/modeling_roberta.py src/transformers/models/segformer/modeling_segformer.py src/transformers/models/sew/modeling_sew.py src/transformers/models/sew_d/modeling_sew_d.py @@ -34,16 +48,3 @@ src/transformers/models/wav2vec2/modeling_wav2vec2.py src/transformers/models/wav2vec2/tokenization_wav2vec2.py src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py src/transformers/models/wavlm/modeling_wavlm.py -src/transformers/models/bart/modeling_bart.py -src/transformers/models/mbart/modeling_mbart.py -src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py -src/transformers/models/marian/modeling_marian.py -src/transformers/models/pegasus/modeling_pegasus.py -src/transformers/models/blenderbot/modeling_blenderbot.py -src/transformers/models/blenderbot_small/modeling_blenderbot_small.py -src/transformers/models/plbart/modeling_plbart.py -src/transformers/generation_utils.py -docs/source/quicktour.mdx -docs/source/task_summary.mdx -src/transformers/models/resnet/modeling_resnet.py -src/transformers/models/glpn/modeling_glpn.py