From beb03ec6c56e12b87fd94b97a36221b976b65651 Mon Sep 17 00:00:00 2001 From: wangfei <1140554608@qq.com> Date: Tue, 6 Aug 2019 11:15:57 +0800 Subject: [PATCH] Fix examples of loading pretrained models in docstring --- pytorch_transformers/modeling_bert.py | 107 +++++++++----------- pytorch_transformers/modeling_gpt2.py | 37 ++++--- pytorch_transformers/modeling_openai.py | 37 ++++--- pytorch_transformers/modeling_transfo_xl.py | 22 ++-- pytorch_transformers/modeling_xlm.py | 52 +++++----- pytorch_transformers/modeling_xlnet.py | 62 +++++------- 6 files changed, 141 insertions(+), 176 deletions(-) diff --git a/pytorch_transformers/modeling_bert.py b/pytorch_transformers/modeling_bert.py index 3f2e7cbda1..6e2df0d2fa 100644 --- a/pytorch_transformers/modeling_bert.py +++ b/pytorch_transformers/modeling_bert.py @@ -643,12 +643,11 @@ class BertModel(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - model = BertModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertModel.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -754,13 +753,11 @@ class BertForPreTraining(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForPreTraining(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - prediction_scores, seq_relationship_scores = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForPreTraining.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> prediction_scores, seq_relationship_scores = outputs[:2] """ def __init__(self, config): @@ -824,13 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForMaskedLM(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, masked_lm_labels=input_ids) - loss, prediction_scores = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForMaskedLM.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, masked_lm_labels=input_ids) + >>> loss, prediction_scores = outputs[:2] """ def __init__(self, config): @@ -891,13 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForNextSentencePrediction(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - seq_relationship_scores = outputs[0] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> seq_relationship_scores = outputs[0] """ def __init__(self, config): @@ -951,14 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForSequenceClassification(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, logits = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForSequenceClassification.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=labels) + >>> loss, logits = outputs[:2] """ def __init__(self, config): @@ -1057,15 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForMultipleChoice(config) - choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] - input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - labels = torch.tensor(1).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, classification_scores = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForMultipleChoice.from_pretrained('bert-base-uncased') + >>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] + >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + >>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=labels) + >>> loss, classification_scores = outputs[:2] """ def __init__(self, config): @@ -1127,14 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForTokenClassification(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, scores = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForTokenClassification.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=labels) + >>> loss, scores = outputs[:2] """ def __init__(self, config): @@ -1203,15 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel): Examples:: - config = BertConfig.from_pretrained('bert-base-uncased') - tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - - model = BertForQuestionAnswering(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - start_positions = torch.tensor([1]) - end_positions = torch.tensor([3]) - outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + >>> model = BertForQuestionAnswering.from_pretrained('bert-base-uncased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> start_positions = torch.tensor([1]) + >>> end_positions = torch.tensor([3]) + >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + >>> loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index 5268c5de7d..9800b6658f 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -433,12 +433,11 @@ class GPT2Model(GPT2PreTrainedModel): Examples:: - config = GPT2Config.from_pretrained('gpt2') - tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - model = GPT2Model(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + >>> model = GPT2Model.from_pretrained('gpt2') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -567,12 +566,11 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): Examples:: - config = GPT2Config.from_pretrained('gpt2') - tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - model = GPT2LMHeadModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=input_ids) - loss, logits = outputs[:2] + >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + >>> model = GPT2LMHeadModel.from_pretrained('gpt2') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=input_ids) + >>> loss, logits = outputs[:2] """ def __init__(self, config): @@ -683,14 +681,13 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): Examples:: - config = GPT2Config.from_pretrained('gpt2') - tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - model = GPT2DoubleHeadsModel(config) - choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary - input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, mc_token_ids) - lm_prediction_scores, mc_prediction_scores = outputs[:2] + >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + >>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2') + >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + >>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, mc_token_ids) + >>> lm_prediction_scores, mc_prediction_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index 187c51c86e..500f455816 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -439,12 +439,11 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): Examples:: - config = OpenAIGPTConfig.from_pretrained('openai-gpt') - tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - model = OpenAIGPTModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + >>> model = OpenAIGPTModel.from_pretrained('openai-gpt') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -558,12 +557,11 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): Examples:: - config = OpenAIGPTConfig.from_pretrained('openai-gpt') - tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - model = OpenAIGPTLMHeadModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=input_ids) - loss, logits = outputs[:2] + >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + >>> model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=input_ids) + >>> loss, logits = outputs[:2] """ def __init__(self, config): @@ -665,14 +663,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): Examples:: - config = OpenAIGPTConfig.from_pretrained('openai-gpt') - tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - model = OpenAIGPTDoubleHeadsModel(config) - choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary - input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, mc_token_ids) - lm_prediction_scores, mc_prediction_scores = outputs[:2] + >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + >>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt') + >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + >>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, mc_token_ids) + >>> lm_prediction_scores, mc_prediction_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_transfo_xl.py b/pytorch_transformers/modeling_transfo_xl.py index 7c999edda7..927cc79fe6 100644 --- a/pytorch_transformers/modeling_transfo_xl.py +++ b/pytorch_transformers/modeling_transfo_xl.py @@ -968,12 +968,11 @@ class TransfoXLModel(TransfoXLPreTrainedModel): Examples:: - config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') - tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') - model = TransfoXLModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states, mems = outputs[:2] + >>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') + >>> model = TransfoXLModel.from_pretrained('transfo-xl-wt103') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states, mems = outputs[:2] """ def __init__(self, config): @@ -1284,12 +1283,11 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): Examples:: - config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') - tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') - model = TransfoXLLMHeadModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - prediction_scores, mems = outputs[:2] + >>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') + >>> model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> prediction_scores, mems = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_xlm.py b/pytorch_transformers/modeling_xlm.py index 7325ff7875..ddf5fee328 100644 --- a/pytorch_transformers/modeling_xlm.py +++ b/pytorch_transformers/modeling_xlm.py @@ -472,12 +472,11 @@ class XLMModel(XLMPreTrainedModel): Examples:: - config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - model = XLMModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + >>> model = XLMModel.from_pretrained('xlm-mlm-en-2048') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output', @@ -745,12 +744,11 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): Examples:: - config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - model = XLMWithLMHeadModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + >>> model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -805,14 +803,12 @@ class XLMForSequenceClassification(XLMPreTrainedModel): Examples:: - config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - - model = XLMForSequenceClassification(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, logits = outputs[:2] + >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + >>> model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=labels) + >>> loss, logits = outputs[:2] """ def __init__(self, config): @@ -885,15 +881,13 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): Examples:: - config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - - model = XLMForQuestionAnswering(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - start_positions = torch.tensor([1]) - end_positions = torch.tensor([3]) - outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + >>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> start_positions = torch.tensor([1]) + >>> end_positions = torch.tensor([3]) + >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + >>> loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_xlnet.py b/pytorch_transformers/modeling_xlnet.py index 9c1752eb74..5b3e049ddf 100644 --- a/pytorch_transformers/modeling_xlnet.py +++ b/pytorch_transformers/modeling_xlnet.py @@ -712,12 +712,11 @@ class XLNetModel(XLNetPreTrainedModel): Examples:: - config = XLNetConfig.from_pretrained('xlnet-large-cased') - tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - model = XLNetModel(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - outputs = model(input_ids) - last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + >>> model = XLNetModel.from_pretrained('xlnet-large-cased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids) + >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -1019,17 +1018,16 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): Examples:: - config = XLNetConfig.from_pretrained('xlnet-large-cased') - tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - model = XLNetLMHeadModel(config) - # We show how to setup inputs to predict a next token using a bi-directional context. - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very ")).unsqueeze(0) # We will predict the masked token - perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) - perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token - target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token - target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) - outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) - next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] + >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + >>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased') + >>> # We show how to setup inputs to predict a next token using a bi-directional context. + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very ")).unsqueeze(0) # We will predict the masked token + >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) + >>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token + >>> target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token + >>> target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) + >>> next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] """ def __init__(self, config): @@ -1100,14 +1098,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): Examples:: - config = XLNetConfig.from_pretrained('xlnet-large-cased') - tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - - model = XLNetForSequenceClassification(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - outputs = model(input_ids, labels=labels) - loss, logits = outputs[:2] + >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + >>> model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + >>> outputs = model(input_ids, labels=labels) + >>> loss, logits = outputs[:2] """ def __init__(self, config): @@ -1200,15 +1196,13 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): Examples:: - config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - - model = XLMForQuestionAnswering(config) - input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - start_positions = torch.tensor([1]) - end_positions = torch.tensor([3]) - outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - loss, start_scores, end_scores = outputs[:2] + >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + >>> model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') + >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + >>> start_positions = torch.tensor([1]) + >>> end_positions = torch.tensor([3]) + >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + >>> loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config):