From bfbe52ec397f0e43641ee58d4e347deff5216777 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sat, 27 Jul 2019 20:25:39 +0200 Subject: [PATCH] cleaning up example docstrings --- hubconfs/bert_hubconf.py | 206 ++++++++++---------- hubconfs/gpt2_hubconf.py | 84 ++++---- hubconfs/gpt_hubconf.py | 76 ++++---- hubconfs/transformer_xl_hubconf.py | 68 +++---- hubconfs/xlm_hubconf.py | 80 ++++---- hubconfs/xlnet_hubconf.1.py | 84 ++++---- pytorch_transformers/modeling_auto.py | 32 +-- pytorch_transformers/modeling_bert.py | 122 ++++++------ pytorch_transformers/modeling_gpt2.py | 40 ++-- pytorch_transformers/modeling_openai.py | 40 ++-- pytorch_transformers/modeling_transfo_xl.py | 24 +-- pytorch_transformers/modeling_utils.py | 32 +-- pytorch_transformers/modeling_xlm.py | 58 +++--- pytorch_transformers/modeling_xlnet.py | 68 +++---- pytorch_transformers/tokenization_auto.py | 4 +- 15 files changed, 509 insertions(+), 509 deletions(-) diff --git a/hubconfs/bert_hubconf.py b/hubconfs/bert_hubconf.py index a0221ff9e1..6e2830617f 100644 --- a/hubconfs/bert_hubconf.py +++ b/hubconfs/bert_hubconf.py @@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs): Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] Example: - >>> import torch - >>> sentence = 'Hello, World!' - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - >>> toks = tokenizer.tokenize(sentence) + import torch + sentence = 'Hello, World!' + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + toks = tokenizer.tokenize(sentence) ['Hello', '##,', 'World', '##!'] - >>> ids = tokenizer.convert_tokens_to_ids(toks) + ids = tokenizer.convert_tokens_to_ids(toks) [8667, 28136, 1291, 28125] """ tokenizer = BertTokenizer.from_pretrained(*args, **kwargs) @@ -105,20 +105,20 @@ def bertModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): encoded_layers, _ = model(tokens_tensor, segments_tensors) """ model = BertModel.from_pretrained(*args, **kwargs) @@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForNextSentencePrediction - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') + model.eval() # Predict the next sentence classification logits - >>> with torch.no_grad(): + with torch.no_grad(): next_sent_classif_logits = model(tokens_tensor, segments_tensors) """ model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs) @@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForPreTraining - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') - >>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') + masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) """ model = BertForPreTraining.from_pretrained(*args, **kwargs) return model @@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> masked_index = 8 - >>> tokenized_text[masked_index] = '[MASK]' - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + masked_index = 8 + tokenized_text[masked_index] = '[MASK]' + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForMaskedLM - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') + model.eval() # Predict all tokens - >>> with torch.no_grad(): + with torch.no_grad(): predictions = model(tokens_tensor, segments_tensors) - >>> predicted_index = torch.argmax(predictions[0, masked_index]).item() - >>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] + predicted_index = torch.argmax(predictions[0, masked_index]).item() + predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] 'henson' """ model = BertForMaskedLM.from_pretrained(*args, **kwargs) @@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForSequenceClassification - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) + model.eval() # Predict the sequence classification logits - >>> with torch.no_grad(): + with torch.no_grad(): seq_classif_logits = model(tokens_tensor, segments_tensors) # Or get the sequence classification loss - >>> labels = torch.tensor([1]) - >>> seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss + labels = torch.tensor([1]) + seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss """ model = BertForSequenceClassification.from_pretrained(*args, **kwargs) return model @@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) - >>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) + segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) # Load bertForMultipleChoice - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) + model.eval() # Predict the multiple choice logits - >>> with torch.no_grad(): + with torch.no_grad(): multiple_choice_logits = model(tokens_tensor, segments_tensors) # Or get the multiple choice loss - >>> labels = torch.tensor([1]) - >>> multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss + labels = torch.tensor([1]) + multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss """ model = BertForMultipleChoice.from_pretrained(*args, **kwargs) return model @@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForQuestionAnswering - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') + model.eval() # Predict the start and end positions logits - >>> with torch.no_grad(): + with torch.no_grad(): start_logits, end_logits = model(tokens_tensor, segments_tensors) # Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions - >>> start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) + start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) # set model.train() before if training this loss - >>> multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) + multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) """ model = BertForQuestionAnswering.from_pretrained(*args, **kwargs) return model @@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input - >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - >>> tokens_tensor = torch.tensor([indexed_tokens]) - >>> segments_tensors = torch.tensor([segments_ids]) + text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] + tokens_tensor = torch.tensor([indexed_tokens]) + segments_tensors = torch.tensor([segments_ids]) # Load bertForTokenClassification - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) + model.eval() # Predict the token classification logits - >>> with torch.no_grad(): + with torch.no_grad(): classif_logits = model(tokens_tensor, segments_tensors) # Or get the token classification loss - >>> labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) - >>> classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss + labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) + classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss """ model = BertForTokenClassification.from_pretrained(*args, **kwargs) return model diff --git a/hubconfs/gpt2_hubconf.py b/hubconfs/gpt2_hubconf.py index dbaa2cd612..18afad3913 100644 --- a/hubconfs/gpt2_hubconf.py +++ b/hubconfs/gpt2_hubconf.py @@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs): Default: None Example: - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') - >>> text = "Who was Jim Henson ?" - >>> indexed_tokens = tokenizer.encode(tokenized_text) + text = "Who was Jim Henson ?" + indexed_tokens = tokenizer.encode(tokenized_text) """ tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) return tokenizer @@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load gpt2Model - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') + model.eval() # Predict hidden states features for each layer # past can be used to reuse precomputed hidden state in a subsequent predictions - >>> with torch.no_grad(): + with torch.no_grad(): hidden_states_1, past = model(tokens_tensor_1) hidden_states_2, past = model(tokens_tensor_2, past=past) """ @@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load gpt2LMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') + model.eval() # Predict hidden states features for each layer # past can be used to reuse precomputed hidden state in a subsequent predictions - >>> with torch.no_grad(): + with torch.no_grad(): predictions_1, past = model(tokens_tensor_1) predictions_2, past = model(tokens_tensor_2, past=past) # Get the predicted last token - >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - >>> predicted_token = tokenizer.decode([predicted_index]) - >>> assert predicted_token == ' who' + predicted_index = torch.argmax(predictions_2[0, -1, :]).item() + predicted_token = tokenizer.decode([predicted_index]) + assert predicted_token == ' who' """ model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) return model @@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input - >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" - >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" - >>> tokenized_text1 = tokenizer.tokenize(text1) - >>> tokenized_text2 = tokenizer.tokenize(text2) - >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) - >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) - >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) - >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) + text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" + text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" + tokenized_text1 = tokenizer.tokenize(text1) + tokenized_text2 = tokenizer.tokenize(text2) + indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) + indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) + tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) + mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # Load gpt2DoubleHeadsModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids) """ model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs) diff --git a/hubconfs/gpt_hubconf.py b/hubconfs/gpt_hubconf.py index c58c1fa708..649075980c 100644 --- a/hubconfs/gpt_hubconf.py +++ b/hubconfs/gpt_hubconf.py @@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs): Default: None Example: - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') - >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + text = "Who was Jim Henson ? Jim Henson was a puppeteer" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) [763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483] """ tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs) @@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input - >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> tokens_tensor = torch.tensor([indexed_tokens]) + text = "Who was Jim Henson ? Jim Henson was a puppeteer" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + tokens_tensor = torch.tensor([indexed_tokens]) # Load openAIGPTModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): hidden_states = model(tokens_tensor) """ model = OpenAIGPTModel.from_pretrained(*args, **kwargs) @@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input - >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" - >>> tokenized_text = tokenizer.tokenize(text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - >>> tokens_tensor = torch.tensor([indexed_tokens]) + text = "Who was Jim Henson ? Jim Henson was a puppeteer" + tokenized_text = tokenizer.tokenize(text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + tokens_tensor = torch.tensor([indexed_tokens]) # Load openAIGPTLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): predictions = model(tokens_tensor) # Get the predicted last token - >>> predicted_index = torch.argmax(predictions[0, -1, :]).item() - >>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] + predicted_index = torch.argmax(predictions[0, -1, :]).item() + predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] '.' """ model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs) @@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input - >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" - >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" - >>> tokenized_text1 = tokenizer.tokenize(text1) - >>> tokenized_text2 = tokenizer.tokenize(text2) - >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) - >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) - >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) - >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) + text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" + text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" + tokenized_text1 = tokenizer.tokenize(text1) + tokenized_text2 = tokenizer.tokenize(text2) + indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) + indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) + tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) + mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # Load openAIGPTDoubleHeadsModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids) """ model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs) diff --git a/hubconfs/transformer_xl_hubconf.py b/hubconfs/transformer_xl_hubconf.py index cfcc6aef5a..548d407581 100644 --- a/hubconfs/transformer_xl_hubconf.py +++ b/hubconfs/transformer_xl_hubconf.py @@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs): * transfo-xl-wt103 Example: - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') - >>> text = "Who was Jim Henson ?" - >>> tokenized_text = tokenizer.tokenize(tokenized_text) - >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) + text = "Who was Jim Henson ?" + tokenized_text = tokenizer.tokenize(tokenized_text) + indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) """ tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) return tokenizer @@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> tokenized_text_1 = tokenizer.tokenize(text_1) - >>> tokenized_text_2 = tokenizer.tokenize(text_2) - >>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) - >>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + tokenized_text_1 = tokenizer.tokenize(text_1) + tokenized_text_2 = tokenizer.tokenize(text_2) + indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) + indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load transformerXLModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') + model.eval() # Predict hidden states features for each layer # We can re-use the memory cells in a subsequent call to attend a longer context - >>> with torch.no_grad(): + with torch.no_grad(): hidden_states_1, mems_1 = model(tokens_tensor_1) hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) """ @@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> tokenized_text_1 = tokenizer.tokenize(text_1) - >>> tokenized_text_2 = tokenizer.tokenize(text_2) - >>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) - >>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + tokenized_text_1 = tokenizer.tokenize(text_1) + tokenized_text_2 = tokenizer.tokenize(text_2) + indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) + indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load transformerXLLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') + model.eval() # Predict hidden states features for each layer # We can re-use the memory cells in a subsequent call to attend a longer context - >>> with torch.no_grad(): + with torch.no_grad(): predictions_1, mems_1 = model(tokens_tensor_1) predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) # Get the predicted last token - >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - >>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] - >>> assert predicted_token == 'who' + predicted_index = torch.argmax(predictions_2[0, -1, :]).item() + predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] + assert predicted_token == 'who' """ model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) return model diff --git a/hubconfs/xlm_hubconf.py b/hubconfs/xlm_hubconf.py index 4f6fd93c24..e96d923944 100644 --- a/hubconfs/xlm_hubconf.py +++ b/hubconfs/xlm_hubconf.py @@ -17,16 +17,16 @@ xlm_start_docstring = """ Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) """ # A lot of models share the same param doc. Use a decorator @@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs): Default: None Example: - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') - >>> text = "Who was Jim Henson ?" - >>> indexed_tokens = tokenizer.encode(tokenized_text) + text = "Who was Jim Henson ?" + indexed_tokens = tokenizer.encode(tokenized_text) """ tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs) return tokenizer @@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs): def xlmModel(*args, **kwargs): """ # Load xlmModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): hidden_states_1, mems = model(tokens_tensor_1) hidden_states_2, mems = model(tokens_tensor_2, past=mems) """ @@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs): def xlmLMHeadModel(*args, **kwargs): """ # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): predictions_1, mems = model(tokens_tensor_1) predictions_2, mems = model(tokens_tensor_2, mems=mems) # Get the predicted last token - >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - >>> predicted_token = tokenizer.decode([predicted_index]) - >>> assert predicted_token == ' who' + predicted_index = torch.argmax(predictions_2[0, -1, :]).item() + predicted_token = tokenizer.decode([predicted_index]) + assert predicted_token == ' who' """ model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs) return model @@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs): # Example: # # Load the tokenizer -# >>> import torch -# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') +# import torch +# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') # # Prepare tokenized input -# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" -# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" -# >>> tokenized_text1 = tokenizer.tokenize(text1) -# >>> tokenized_text2 = tokenizer.tokenize(text2) -# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) -# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) -# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) -# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) +# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" +# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" +# tokenized_text1 = tokenizer.tokenize(text1) +# tokenized_text2 = tokenizer.tokenize(text2) +# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) +# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) +# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) +# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # # Load xlnetForSequenceClassification -# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') -# >>> model.eval() +# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') +# model.eval() # # Predict sequence classes logits -# >>> with torch.no_grad(): +# with torch.no_grad(): # lm_logits, mems = model(tokens_tensor) # """ # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) diff --git a/hubconfs/xlnet_hubconf.1.py b/hubconfs/xlnet_hubconf.1.py index 4c5105a241..fa7b7ddb9f 100644 --- a/hubconfs/xlnet_hubconf.1.py +++ b/hubconfs/xlnet_hubconf.1.py @@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs): Default: None Example: - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') - >>> text = "Who was Jim Henson ?" - >>> indexed_tokens = tokenizer.encode(tokenized_text) + text = "Who was Jim Henson ?" + indexed_tokens = tokenizer.encode(tokenized_text) """ tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs) return tokenizer @@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): hidden_states_1, mems = model(tokens_tensor_1) hidden_states_2, mems = model(tokens_tensor_2, past=mems) """ @@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs): Example: # Load the tokenizer - >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') + import torch + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # Prepare tokenized input - >>> text_1 = "Who was Jim Henson ?" - >>> text_2 = "Jim Henson was a puppeteer" - >>> indexed_tokens_1 = tokenizer.encode(text_1) - >>> indexed_tokens_2 = tokenizer.encode(text_2) - >>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) + text_1 = "Who was Jim Henson ?" + text_2 = "Jim Henson was a puppeteer" + indexed_tokens_1 = tokenizer.encode(text_1) + indexed_tokens_2 = tokenizer.encode(text_2) + tokens_tensor_1 = torch.tensor([indexed_tokens_1]) + tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') - >>> model.eval() + model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') + model.eval() # Predict hidden states features for each layer - >>> with torch.no_grad(): + with torch.no_grad(): predictions_1, mems = model(tokens_tensor_1) predictions_2, mems = model(tokens_tensor_2, mems=mems) # Get the predicted last token - >>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - >>> predicted_token = tokenizer.decode([predicted_index]) - >>> assert predicted_token == ' who' + predicted_index = torch.argmax(predictions_2[0, -1, :]).item() + predicted_token = tokenizer.decode([predicted_index]) + assert predicted_token == ' who' """ model = XLNetLMHeadModel.from_pretrained(*args, **kwargs) return model @@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs): # Example: # # Load the tokenizer -# >>> import torch -# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') +# import torch +# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # # Prepare tokenized input -# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" -# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" -# >>> tokenized_text1 = tokenizer.tokenize(text1) -# >>> tokenized_text2 = tokenizer.tokenize(text2) -# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) -# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) -# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) -# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) +# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" +# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" +# tokenized_text1 = tokenizer.tokenize(text1) +# tokenized_text2 = tokenizer.tokenize(text2) +# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) +# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) +# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) +# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # # Load xlnetForSequenceClassification -# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') -# >>> model.eval() +# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') +# model.eval() # # Predict sequence classes logits -# >>> with torch.no_grad(): +# with torch.no_grad(): # lm_logits, mems = model(tokens_tensor) # """ # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) diff --git a/pytorch_transformers/modeling_auto.py b/pytorch_transformers/modeling_auto.py index aa50b1526d..3e28fbd0a9 100644 --- a/pytorch_transformers/modeling_auto.py +++ b/pytorch_transformers/modeling_auto.py @@ -89,15 +89,15 @@ class AutoConfig(object): Examples:: - >>> config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. - >>> config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` - >>> config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json') - >>> config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) - >>> assert config.output_attention == True - >>> config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, - >>> foo=False, return_unused_kwargs=True) - >>> assert config.output_attention == True - >>> assert unused_kwargs == {'foo': False} + config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. + config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` + config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json') + config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) + assert config.output_attention == True + config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, + foo=False, return_unused_kwargs=True) + assert config.output_attention == True + assert unused_kwargs == {'foo': False} """ if 'bert' in pretrained_model_name_or_path: @@ -202,13 +202,13 @@ class AutoModel(object): Examples:: - >>> model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. - >>> model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - >>> model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading - >>> assert model.config.output_attention == True - >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower) - >>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') - >>> model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) + model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. + model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') + model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ if 'bert' in pretrained_model_name_or_path: diff --git a/pytorch_transformers/modeling_bert.py b/pytorch_transformers/modeling_bert.py index b59445513a..3f2e7cbda1 100644 --- a/pytorch_transformers/modeling_bert.py +++ b/pytorch_transformers/modeling_bert.py @@ -643,12 +643,12 @@ class BertModel(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> model = BertModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -754,13 +754,13 @@ class BertForPreTraining(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForPreTraining(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> prediction_scores, seq_relationship_scores = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForPreTraining(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + prediction_scores, seq_relationship_scores = outputs[:2] """ def __init__(self, config): @@ -824,13 +824,13 @@ class BertForMaskedLM(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForMaskedLM(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, masked_lm_labels=input_ids) - >>> loss, prediction_scores = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForMaskedLM(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, masked_lm_labels=input_ids) + loss, prediction_scores = outputs[:2] """ def __init__(self, config): @@ -891,13 +891,13 @@ class BertForNextSentencePrediction(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForNextSentencePrediction(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> seq_relationship_scores = outputs[0] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForNextSentencePrediction(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + seq_relationship_scores = outputs[0] """ def __init__(self, config): @@ -951,14 +951,14 @@ class BertForSequenceClassification(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForSequenceClassification(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=labels) - >>> loss, logits = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForSequenceClassification(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, logits = outputs[:2] """ def __init__(self, config): @@ -1057,15 +1057,15 @@ class BertForMultipleChoice(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForMultipleChoice(config) - >>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] - >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - >>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=labels) - >>> loss, classification_scores = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForMultipleChoice(config) + choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] + input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + labels = torch.tensor(1).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, classification_scores = outputs[:2] """ def __init__(self, config): @@ -1127,14 +1127,14 @@ class BertForTokenClassification(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForTokenClassification(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=labels) - >>> loss, scores = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForTokenClassification(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, scores = outputs[:2] """ def __init__(self, config): @@ -1203,15 +1203,15 @@ class BertForQuestionAnswering(BertPreTrainedModel): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') - >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') - >>> - >>> model = BertForQuestionAnswering(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> start_positions = torch.tensor([1]) - >>> end_positions = torch.tensor([3]) - >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - >>> loss, start_scores, end_scores = outputs[:2] + config = BertConfig.from_pretrained('bert-base-uncased') + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + + model = BertForQuestionAnswering(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + start_positions = torch.tensor([1]) + end_positions = torch.tensor([3]) + outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py index b8a459db7d..4341f0d8a1 100644 --- a/pytorch_transformers/modeling_gpt2.py +++ b/pytorch_transformers/modeling_gpt2.py @@ -433,12 +433,12 @@ class GPT2Model(GPT2PreTrainedModel): Examples:: - >>> config = GPT2Config.from_pretrained('gpt2') - >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - >>> model = GPT2Model(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = GPT2Config.from_pretrained('gpt2') + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + model = GPT2Model(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -567,12 +567,12 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): Examples:: - >>> config = GPT2Config.from_pretrained('gpt2') - >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - >>> model = GPT2LMHeadModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=input_ids) - >>> loss, logits = outputs[:2] + config = GPT2Config.from_pretrained('gpt2') + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + model = GPT2LMHeadModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=input_ids) + loss, logits = outputs[:2] """ def __init__(self, config): @@ -683,14 +683,14 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): Examples:: - >>> config = GPT2Config.from_pretrained('gpt2') - >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - >>> model = GPT2DoubleHeadsModel(config) - >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary - >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - >>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, mc_token_ids) - >>> lm_prediction_scores, mc_prediction_scores = outputs[:2] + config = GPT2Config.from_pretrained('gpt2') + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + model = GPT2DoubleHeadsModel(config) + choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, mc_token_ids) + lm_prediction_scores, mc_prediction_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_openai.py b/pytorch_transformers/modeling_openai.py index 4ea19a965d..a6cb6212ef 100644 --- a/pytorch_transformers/modeling_openai.py +++ b/pytorch_transformers/modeling_openai.py @@ -439,12 +439,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): Examples:: - >>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') - >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - >>> model = OpenAIGPTModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = OpenAIGPTConfig.from_pretrained('openai-gpt') + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + model = OpenAIGPTModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -558,12 +558,12 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): Examples:: - >>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') - >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - >>> model = OpenAIGPTLMHeadModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=input_ids) - >>> loss, logits = outputs[:2] + config = OpenAIGPTConfig.from_pretrained('openai-gpt') + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + model = OpenAIGPTLMHeadModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=input_ids) + loss, logits = outputs[:2] """ def __init__(self, config): @@ -665,14 +665,14 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): Examples:: - >>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') - >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') - >>> model = OpenAIGPTDoubleHeadsModel(config) - >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary - >>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices - >>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, mc_token_ids) - >>> lm_prediction_scores, mc_prediction_scores = outputs[:2] + config = OpenAIGPTConfig.from_pretrained('openai-gpt') + tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') + model = OpenAIGPTDoubleHeadsModel(config) + choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary + input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices + mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, mc_token_ids) + lm_prediction_scores, mc_prediction_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_transfo_xl.py b/pytorch_transformers/modeling_transfo_xl.py index 3280c4558d..7c999edda7 100644 --- a/pytorch_transformers/modeling_transfo_xl.py +++ b/pytorch_transformers/modeling_transfo_xl.py @@ -968,12 +968,12 @@ class TransfoXLModel(TransfoXLPreTrainedModel): Examples:: - >>> config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') - >>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') - >>> model = TransfoXLModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states, mems = outputs[:2] + config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') + tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') + model = TransfoXLModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states, mems = outputs[:2] """ def __init__(self, config): @@ -1284,12 +1284,12 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): Examples:: - >>> config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') - >>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') - >>> model = TransfoXLLMHeadModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> prediction_scores, mems = outputs[:2] + config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') + tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') + model = TransfoXLLMHeadModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + prediction_scores, mems = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_utils.py b/pytorch_transformers/modeling_utils.py index 7ae834f5e5..e458c5ef74 100644 --- a/pytorch_transformers/modeling_utils.py +++ b/pytorch_transformers/modeling_utils.py @@ -105,15 +105,15 @@ class PretrainedConfig(object): Examples:: - >>> config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. - >>> config = BertConfig.from_pretrained('./test/saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` - >>> config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json') - >>> config = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) - >>> assert config.output_attention == True - >>> config, unused_kwargs = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, - >>> foo=False, return_unused_kwargs=True) - >>> assert config.output_attention == True - >>> assert unused_kwargs == {'foo': False} + config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. + config = BertConfig.from_pretrained('./test/saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` + config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json') + config = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) + assert config.output_attention == True + config, unused_kwargs = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, + foo=False, return_unused_kwargs=True) + assert config.output_attention == True + assert unused_kwargs == {'foo': False} """ cache_dir = kwargs.pop('cache_dir', None) @@ -369,13 +369,13 @@ class PreTrainedModel(nn.Module): Examples::dictionary - >>> model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. - >>> model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - >>> model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading - >>> assert model.config.output_attention == True - >>> # Loading from a TF checkpoint file instead of a PyTorch model (slower) - >>> config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json') - >>> model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config) + model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. + model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json') + model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop('config', None) diff --git a/pytorch_transformers/modeling_xlm.py b/pytorch_transformers/modeling_xlm.py index 3bb864501a..7325ff7875 100644 --- a/pytorch_transformers/modeling_xlm.py +++ b/pytorch_transformers/modeling_xlm.py @@ -472,12 +472,12 @@ class XLMModel(XLMPreTrainedModel): Examples:: - >>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - >>> model = XLMModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = XLMConfig.from_pretrained('xlm-mlm-en-2048') + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + model = XLMModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output', @@ -745,12 +745,12 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): Examples:: - >>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - >>> model = XLMWithLMHeadModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = XLMConfig.from_pretrained('xlm-mlm-en-2048') + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + model = XLMWithLMHeadModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -805,14 +805,14 @@ class XLMForSequenceClassification(XLMPreTrainedModel): Examples:: - >>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - >>> - >>> model = XLMForSequenceClassification(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=labels) - >>> loss, logits = outputs[:2] + config = XLMConfig.from_pretrained('xlm-mlm-en-2048') + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + + model = XLMForSequenceClassification(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, logits = outputs[:2] """ def __init__(self, config): @@ -885,15 +885,15 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): Examples:: - >>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - >>> - >>> model = XLMForQuestionAnswering(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> start_positions = torch.tensor([1]) - >>> end_positions = torch.tensor([3]) - >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - >>> loss, start_scores, end_scores = outputs[:2] + config = XLMConfig.from_pretrained('xlm-mlm-en-2048') + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + + model = XLMForQuestionAnswering(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + start_positions = torch.tensor([1]) + end_positions = torch.tensor([3]) + outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/modeling_xlnet.py b/pytorch_transformers/modeling_xlnet.py index 515decdb3e..9c1752eb74 100644 --- a/pytorch_transformers/modeling_xlnet.py +++ b/pytorch_transformers/modeling_xlnet.py @@ -712,12 +712,12 @@ class XLNetModel(XLNetPreTrainedModel): Examples:: - >>> config = XLNetConfig.from_pretrained('xlnet-large-cased') - >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - >>> model = XLNetModel(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids) - >>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + config = XLNetConfig.from_pretrained('xlnet-large-cased') + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + model = XLNetModel(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple """ def __init__(self, config): @@ -1019,17 +1019,17 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): Examples:: - >>> config = XLNetConfig.from_pretrained('xlnet-large-cased') - >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - >>> model = XLNetLMHeadModel(config) - >>> # We show how to setup inputs to predict a next token using a bi-directional context. - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very ")).unsqueeze(0) # We will predict the masked token - >>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) - >>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token - >>> target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token - >>> target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) - >>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) - >>> next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] + config = XLNetConfig.from_pretrained('xlnet-large-cased') + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + model = XLNetLMHeadModel(config) + # We show how to setup inputs to predict a next token using a bi-directional context. + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very ")).unsqueeze(0) # We will predict the masked token + perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) + perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token + target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token + target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) + outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) + next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] """ def __init__(self, config): @@ -1100,14 +1100,14 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): Examples:: - >>> config = XLNetConfig.from_pretrained('xlnet-large-cased') - >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') - >>> - >>> model = XLNetForSequenceClassification(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 - >>> outputs = model(input_ids, labels=labels) - >>> loss, logits = outputs[:2] + config = XLNetConfig.from_pretrained('xlnet-large-cased') + tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') + + model = XLNetForSequenceClassification(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 + outputs = model(input_ids, labels=labels) + loss, logits = outputs[:2] """ def __init__(self, config): @@ -1200,15 +1200,15 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): Examples:: - >>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') - >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') - >>> - >>> model = XLMForQuestionAnswering(config) - >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 - >>> start_positions = torch.tensor([1]) - >>> end_positions = torch.tensor([3]) - >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) - >>> loss, start_scores, end_scores = outputs[:2] + config = XLMConfig.from_pretrained('xlm-mlm-en-2048') + tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') + + model = XLMForQuestionAnswering(config) + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + start_positions = torch.tensor([1]) + end_positions = torch.tensor([3]) + outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) + loss, start_scores, end_scores = outputs[:2] """ def __init__(self, config): diff --git a/pytorch_transformers/tokenization_auto.py b/pytorch_transformers/tokenization_auto.py index 66d0ce51ba..acbe1cebc6 100644 --- a/pytorch_transformers/tokenization_auto.py +++ b/pytorch_transformers/tokenization_auto.py @@ -78,8 +78,8 @@ class AutoTokenizer(object): Examples:: - >>> config = AutoTokenizer.from_pretrained('bert-base-uncased') # Download vocabulary from S3 and cache. - >>> config = AutoTokenizer.from_pretrained('./test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` + config = AutoTokenizer.from_pretrained('bert-base-uncased') # Download vocabulary from S3 and cache. + config = AutoTokenizer.from_pretrained('./test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` """ if 'bert' in pretrained_model_name_or_path: