diff --git a/docs/source/model_doc/gpt.rst b/docs/source/model_doc/gpt.rst index 8c27fcb776..9604b39cea 100644 --- a/docs/source/model_doc/gpt.rst +++ b/docs/source/model_doc/gpt.rst @@ -4,7 +4,7 @@ OpenAI GPT Overview ~~~~~~~~~~~~~~~~~~~~~ -OpenAI GPT model was proposed in `Improving Language Understanding by Generative Pre-Training`_ +OpenAI GPT model was proposed in `Improving Language Understanding by Generative Pre-Training `__ by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus. diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 23fd3171d2..8aafa95f43 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -88,6 +88,8 @@ TF_WEIGHTS_NAME = "model.ckpt" CONFIG_NAME = "config.json" MODEL_CARD_NAME = "modelcard.json" + +MULTIPLE_CHOICE_DUMMY_INPUTS = [[[0], [1]], [[0], [1]]] DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]] DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]] diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index 04c06e06db..f7094a287b 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -659,8 +659,8 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): from transformers import AlbertTokenizer, AlbertForMaskedLM import torch - tokenizer = BertTokenizer.from_pretrained('albert-base-v2') - model = BertForMaskedLM.from_pretrained('albert-base-v2') + tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') + model = AlbertForMaskedLM.from_pretrained('albert-base-v2') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, masked_lm_labels=input_ids) loss, prediction_scores = outputs[:2] @@ -839,16 +839,19 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. - Examples:: + Examples:: - # The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the - # examples/run_squad.py example to see how to fine-tune a model to a question answering task. + # The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the + # examples/run_squad.py example to see how to fine-tune a model to a question answering task. - tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') - model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2') - question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" - input_dict = tokenizer.encode_plus(question, text, return_tensors='pt') - start_scores, end_scores = model(**input_dict) + from transformers import AlbertTokenizer, AlbertForQuestionAnswering + import torch + + tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') + model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2') + question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" + input_dict = tokenizer.encode_plus(question, text, return_tensors='pt') + start_scores, end_scores = model(**input_dict) """ diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index a66551fbee..7a881a0805 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -687,10 +687,15 @@ class BertModel(BertPreTrainedModel): Examples:: + from transformers import BertModel, BertTokenizer + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ @@ -873,10 +878,15 @@ class BertForPreTraining(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForPreTraining + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForPreTraining.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) + prediction_scores, seq_relationship_scores = outputs[:2] """ @@ -968,10 +978,15 @@ class BertForMaskedLM(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForMaskedLM + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForMaskedLM.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, masked_lm_labels=input_ids) + loss, prediction_scores = outputs[:2] """ @@ -1064,10 +1079,15 @@ class BertForNextSentencePrediction(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForNextSentencePrediction + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) + seq_relationship_scores = outputs[0] """ @@ -1148,11 +1168,16 @@ class BertForSequenceClassification(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForSequenceClassification + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) + loss, logits = outputs[:2] """ @@ -1240,12 +1265,17 @@ class BertForMultipleChoice(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForMultipleChoice + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForMultipleChoice.from_pretrained('bert-base-uncased') choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] + input_ids = torch.tensor([tokenizer.encode(s, add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices labels = torch.tensor(1).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) + loss, classification_scores = outputs[:2] """ @@ -1333,11 +1363,16 @@ class BertForTokenClassification(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForTokenClassification + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForTokenClassification.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) + loss, scores = outputs[:2] """ @@ -1431,15 +1466,21 @@ class BertForQuestionAnswering(BertPreTrainedModel): Examples:: + from transformers import BertTokenizer, BertForQuestionAnswering + import torch + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad') + question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" input_ids = tokenizer.encode(question, text) token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))] start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids])) + all_tokens = tokenizer.convert_ids_to_tokens(input_ids) - print(' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])) - # a nice puppet + answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]) + + assert answer == "a nice puppet" """ diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index 2a5301e099..40e076a498 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -313,10 +313,15 @@ class CTRLModel(CTRLPreTrainedModel): Examples:: + from transformers import CTRLTokenizer, CTRLModel + import torch + tokenizer = CTRLTokenizer.from_pretrained('ctrl') model = CTRLModel.from_pretrained('ctrl') + input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 7a66292d5e..bbccdcddd7 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -437,10 +437,15 @@ class DistilBertModel(DistilBertPreTrainedModel): Examples:: + from transformers import DistilBertTokenizer, DistilBertModel + import torch + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertModel.from_pretrained('distilbert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ @@ -536,6 +541,9 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): Examples:: + from transformers import DistilBertTokenizer, DistilBertForMaskedLM + import torch + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -608,6 +616,9 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): Examples:: + from transformers import DistilBertTokenizer, DistilBertForSequenceClassification + import torch + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -697,6 +708,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): Examples:: + from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering + import torch + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -781,6 +795,9 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): Examples:: + from transformers import DistilBertTokenizer, DistilBertForTokenClassification + import torch + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertForTokenClassification.from_pretrained('distilbert-base-uncased') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 diff --git a/src/transformers/modeling_encoder_decoder.py b/src/transformers/modeling_encoder_decoder.py index c48c6b4b32..0951baff7d 100644 --- a/src/transformers/modeling_encoder_decoder.py +++ b/src/transformers/modeling_encoder_decoder.py @@ -109,6 +109,7 @@ class PreTrainedEncoderDecoder(nn.Module): Examples:: + # For example purposes. Not runnable. model = PreTrainedEncoderDecoder.from_pretained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert """ diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index a9c7910eb6..2426ef4352 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -385,6 +385,9 @@ class GPT2Model(GPT2PreTrainedModel): Examples:: + from transformers import GPT2Tokenizer, GPT2Model + import torch + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2Model.from_pretrained('gpt2') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 diff --git a/src/transformers/modeling_mmbt.py b/src/transformers/modeling_mmbt.py index 1df20534e0..a3aae38965 100644 --- a/src/transformers/modeling_mmbt.py +++ b/src/transformers/modeling_mmbt.py @@ -169,6 +169,8 @@ class MMBTModel(nn.Module): Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. Examples:: + + # For example purposes. Not runnable. transformer = BertModel.from_pretrained('bert-base-uncased') encoder = ImageEncoder(args) mmbt = MMBTModel(config, transformer, encoder) @@ -351,6 +353,7 @@ class MMBTForClassification(nn.Module): Examples:: + # For example purposes. Not runnable. transformer = BertModel.from_pretrained('bert-base-uncased') encoder = ImageEncoder(args) model = MMBTForClassification(config, transformer, encoder) diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index 1f098b3cc9..70abd5a1dc 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -388,6 +388,9 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): Examples:: + from transformers import OpenAIGPTTokenizer, OpenAIGPTModel + import torch + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') model = OpenAIGPTModel.from_pretrained('openai-gpt') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -541,6 +544,9 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): Examples:: + from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel + import torch + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -650,6 +656,9 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): Examples:: + from transformers import OpenAIGPTTokenizer, OpenAIGPTDoubleHeadsModel + import torch + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt') tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!) diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index bd8068be5b..50de77b85c 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -224,6 +224,9 @@ class RobertaForMaskedLM(BertPreTrainedModel): Examples:: + from transformers import RobertaTokenizer, RobertaForMaskedLM + import torch + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = RobertaForMaskedLM.from_pretrained('roberta-base') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -332,6 +335,9 @@ class RobertaForSequenceClassification(BertPreTrainedModel): Examples:: + from transformers import RobertaTokenizer, RobertaForSequenceClassification + import torch + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = RobertaForSequenceClassification.from_pretrained('roberta-base') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -423,6 +429,9 @@ class RobertaForMultipleChoice(BertPreTrainedModel): Examples:: + from transformers import RobertaTokenizer, RobertaForMultipleChoice + import torch + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = RobertaForMultipleChoice.from_pretrained('roberta-base') choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] @@ -515,14 +524,17 @@ class RobertaForTokenClassification(BertPreTrainedModel): Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. - Examples:: + Examples:: - tokenizer = RobertaTokenizer.from_pretrained('roberta-base') - model = RobertaForTokenClassification.from_pretrained('roberta-base') - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 - labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, scores = outputs[:2] + from transformers import RobertaTokenizer, RobertaForTokenClassification + import torch + + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') + model = RobertaForTokenClassification.from_pretrained('roberta-base') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 + labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, scores = outputs[:2] """ @@ -637,13 +649,23 @@ class RobertaForQuestionAnswering(BertPreTrainedModel): heads. Examples:: - tokenizer = RobertaTokenizer.from_pretrained('roberta-large') - model = RobertaForQuestionAnswering.from_pretrained('roberta-large') + + # The checkpoint roberta-large is not fine-tuned for question answering. Please see the + # examples/run_squad.py example to see how to fine-tune a model to a question answering task. + + from transformers import RobertaTokenizer, RobertaForQuestionAnswering + import torch + + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') + model = RobertaForQuestionAnswering.from_pretrained('roberta-base') + question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" input_ids = tokenizer.encode(question, text) start_scores, end_scores = model(torch.tensor([input_ids])) + all_tokens = tokenizer.convert_ids_to_tokens(input_ids) answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]) + """ outputs = self.roberta( diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index e9f492468b..08b7b7e466 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -22,7 +22,7 @@ import numpy as np import tensorflow as tf from .configuration_bert import BertConfig -from .file_utils import add_start_docstrings, add_start_docstrings_to_callable +from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list @@ -939,6 +939,15 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) + @property + def dummy_inputs(self): + """ Dummy inputs to build the network. + + Returns: + tf.Tensor with dummy inputs + """ + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) def call( self, diff --git a/src/transformers/modeling_tf_ctrl.py b/src/transformers/modeling_tf_ctrl.py index 1756da703e..78e0c1113a 100644 --- a/src/transformers/modeling_tf_ctrl.py +++ b/src/transformers/modeling_tf_ctrl.py @@ -530,14 +530,14 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel): Examples:: - import torch + import tensorflow as tf from transformers import CTRLTokenizer, TFCTRLLMHeadModel tokenizer = CTRLTokenizer.from_pretrained('ctrl') model = TFCTRLLMHeadModel.from_pretrained('ctrl') - input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=input_ids) + input_ids = tf.constant([tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)]) + outputs = model(input_ids) loss, logits = outputs[:2] """ diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py index 2f5f253cd6..1dc8301730 100644 --- a/src/transformers/modeling_tf_distilbert.py +++ b/src/transformers/modeling_tf_distilbert.py @@ -699,7 +699,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel): Examples:: import tensorflow as tf - from transformers import BertTokenizer, TFDistilBertForSequenceClassification + from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') @@ -755,10 +755,12 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel): Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. Examples:: + import tensorflow as tf from transformers import DistilBertTokenizer, TFDistilBertForTokenClassification - tokenizer = DistilBertTokenizer.from_pretrained('bert-base-uncased') - model = TFDistilBertForTokenClassification.from_pretrained('bert-base-uncased') + + tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') + model = TFDistilBertForTokenClassification.from_pretrained('distilbert-base-uncased') input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1 outputs = model(input_ids) scores = outputs[0] @@ -814,7 +816,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel): Examples:: import tensorflow as tf - from transformers import BertTokenizer, TFDistilBertForQuestionAnswering + from transformers import DistilBertTokenizer, TFDistilBertForQuestionAnswering tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = TFDistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased') diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index 5ff7491e8d..1156660953 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -609,6 +609,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): Examples:: + # For example purposes. Not runnable. import tensorflow as tf from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index 8ed1654b6a..f04104db83 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -582,6 +582,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): Examples:: + # For example purposes. Not runnable. import tensorflow as tf from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index d0cce05cee..31fb43f1cc 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -293,7 +293,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel): tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = TFRobertaForMaskedLM.from_pretrained('roberta-base') input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 - outputs = model(input_ids, masked_lm_labels=input_ids) + outputs = model(input_ids) prediction_scores = outputs[0] """ @@ -368,7 +368,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel): import tensorflow as tf from transformers import RobertaTokenizer, TFRobertaForSequenceClassification - tokenizer = RoertaTokenizer.from_pretrained('roberta-base') + tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = TFRobertaForSequenceClassification.from_pretrained('roberta-base') input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 labels = tf.constant([1])[None, :] # Batch size 1 diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 1e8839c988..4b64f9364c 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -248,6 +248,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): Examples:: + # For example purposes. Not runnable. model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index 6f4d56252c..d9ced75384 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -863,6 +863,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel): Examples:: import tensorflow as tf + import numpy as np from transformers import XLNetTokenizer, TFXLNetLMHeadModel tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') @@ -870,11 +871,11 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel): # We show how to setup inputs to predict a next token using a bi-directional context. input_ids = tf.constant(tokenizer.encode("Hello, my dog is very ", add_special_tokens=True))[None, :] # We will predict the masked token - perm_mask = tf.zeros((1, input_ids.shape[1], input_ids.shape[1])) + perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1])) perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token - target_mapping = tf.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token + target_mapping = np.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) - outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) + outputs = model(input_ids, perm_mask=tf.constant(perm_mask, dtype=tf.float32), target_mapping=tf.constant(target_mapping, dtype=tf.float32)) next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] @@ -995,7 +996,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel): from transformers import XLNetTokenizer, TFXLNetForTokenClassification tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - model = TFXLNetForSequenceClassification.from_pretrained('xlnet-large-cased') + model = TFXLNetForTokenClassification.from_pretrained('xlnet-large-cased') input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1 outputs = model(input_ids) scores = outputs[0] @@ -1115,6 +1116,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel): # Examples:: +# # For example purposes. Not runnable. # tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') # model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') # input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py index 85bb26a2a8..05bb5f7e3e 100644 --- a/src/transformers/modeling_transfo_xl.py +++ b/src/transformers/modeling_transfo_xl.py @@ -694,6 +694,9 @@ class TransfoXLModel(TransfoXLPreTrainedModel): Examples:: + from transformers import TransfoXLTokenizer, TransfoXLModel + import torch + tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') model = TransfoXLModel.from_pretrained('transfo-xl-wt103') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -883,6 +886,9 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): Examples:: + from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel + import torch + tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 381b0013f2..a9074d749c 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): Examples:: + # For example purposes. Not runnable. model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading diff --git a/src/transformers/modeling_xlm.py b/src/transformers/modeling_xlm.py index de980eb7b6..9ba5540f9c 100644 --- a/src/transformers/modeling_xlm.py +++ b/src/transformers/modeling_xlm.py @@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel): Examples:: + from transformers import XLMTokenizer, XLMModel + import torch + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') model = XLMModel.from_pretrained('xlm-mlm-en-2048') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): Examples:: + from transformers import XLMTokenizer, XLMWithLMHeadModel + import torch + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel): Examples:: + from transformers import XLMTokenizer, XLMForSequenceClassification + import torch + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 @@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): Examples:: + from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple + import torch + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 start_positions = torch.tensor([1]) end_positions = torch.tensor([3]) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + loss = outputs[0] """ transformer_outputs = self.transformer( @@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): Examples:: + from transformers import XLMTokenizer, XLMForQuestionAnswering + import torch + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048') input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 start_positions = torch.tensor([1]) end_positions = torch.tensor([3]) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + loss = outputs[0] """ transformer_outputs = self.transformer( diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index a0c5cebe59..d059bdf6f1 100644 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel): Examples:: + from transformers import XLNetTokenizer, XLNetModel + import torch + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') model = XLNetModel.from_pretrained('xlnet-large-cased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple @@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): Examples:: + from transformers import XLNetTokenizer, XLNetLMHeadModel + import torch + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased') + # We show how to setup inputs to predict a next token using a bi-directional context. input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very ", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] @@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): Examples:: + from transformers import XLNetTokenizer, XLNetForSequenceClassification + import torch + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) @@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): Examples:: + from transformers import XLNetTokenizer, XLNetForTokenClassification + import torch + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') + model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels) + scores = outputs[0] """ @@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): Examples:: + from transformers import XLNetTokenizer, XLNetForMultipleChoice + import torch + tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased') + choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices labels = torch.tensor(1).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) loss, classification_scores = outputs[:2] @@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): Examples:: - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') + from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple + import torch + + tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') + model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 start_positions = torch.tensor([1]) end_positions = torch.tensor([3]) + outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + loss = outputs[0] """ @@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): Examples:: - tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') + from transformers import XLNetTokenizer, XLNetForQuestionAnswering + import torch + + tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') + model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 start_positions = torch.tensor([1]) end_positions = torch.tensor([3]) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + loss = outputs[0] """ transformer_outputs = self.transformer( diff --git a/tests/test_examples.py b/tests/test_examples.py index d6698a3771..c97af35200 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -17,7 +17,7 @@ import os import unittest from typing import List, Union -from .utils import require_torch +from .utils import require_tf, require_torch, slow def get_examples_from_file(file): @@ -51,14 +51,19 @@ def get_examples_from_file(file): elif "examples::" in line.lower(): example_mode = True example_indentation = line.lower().find("examples::") - elif "::" in line.lower(): - example_mode = True - example_indentation = line.lower().find("::") + # elif "::" in line.lower() and len(line.strip()) == 2: + # example_mode = True + # example_indentation = line.lower().find("::") - return ["\n".join(example) for example in examples] + examples = ["\n".join(example) for example in examples] + examples = [example for example in examples if "not runnable" not in example.lower()] + + return examples @require_torch +@require_tf +@slow class TestCodeExamples(unittest.TestCase): def analyze_directory( self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None @@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase): joined_examples = [] def execute_example(code_example): - exec(code_example) + exec(code_example, {}) # Some examples are the continuation of others. - if len(examples) > 1: + if len(examples) > 0: joined_examples.append(examples[0]) joined_examples_index = 0 for example in examples[1:]: @@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase): print("Testing", file, str(len(joined_examples)) + "/" + str(len(joined_examples))) # Execute sub tests with every example. - with self.subTest(msg=file): - [execute_example(code_example) for code_example in joined_examples] + for index, code_example in enumerate(joined_examples): + with self.subTest(msg=file + " " + str(index) + "/" + str(len(joined_examples)) + code_example): + execute_example(code_example) def test_configuration_examples(self): transformers_directory = "src/transformers" @@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase): def test_main_doc_examples(self): doc_directory = "docs/source" self.analyze_directory(doc_directory) + + def test_modeling_examples(self): + transformers_directory = "src/transformers" + modeling_files = "modeling" + ignore_files = [ + "modeling_auto.py", + "modeling_t5.py", + "modeling_tf_auto.py", + "modeling_utils.py", + "modeling_tf_t5.py", + ] + self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)