From 256086bc6908448fc6aff9b1e19d95c4f6019bee Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 30 Aug 2019 22:34:23 +0200 Subject: [PATCH] clean up and simplify hubconf --- hubconf.py | 118 ++++++++- hubconfs/automodels_hubconf.py | 110 -------- hubconfs/bert_hubconf.py | 360 -------------------------- hubconfs/gpt2_hubconf.py | 168 ------------ hubconfs/gpt_hubconf.py | 186 ------------- hubconfs/transformer_xl_hubconf.py | 130 ---------- hubconfs/xlm_hubconf.py | 167 ------------ hubconfs/xlnet_hubconf.1.py | 169 ------------ pytorch_transformers/modeling_auto.py | 5 - 9 files changed, 110 insertions(+), 1303 deletions(-) delete mode 100644 hubconfs/automodels_hubconf.py delete mode 100644 hubconfs/bert_hubconf.py delete mode 100644 hubconfs/gpt2_hubconf.py delete mode 100644 hubconfs/gpt_hubconf.py delete mode 100644 hubconfs/transformer_xl_hubconf.py delete mode 100644 hubconfs/xlm_hubconf.py delete mode 100644 hubconfs/xlnet_hubconf.1.py diff --git a/hubconf.py b/hubconf.py index 05afd63a46..35e7f1eea8 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,10 +1,112 @@ +from pytorch_transformers import ( + AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering +) +from pytorch_transformers.modeling_utils import add_start_docstrings + dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses'] -from hubconfs.automodels_hubconf import ( - config, - model, - modelForQuestionAnswering, - modelForSequenceClassification, - modelWithLMHead, - tokenizer, -) +@add_start_docstrings(AutoConfig.__doc__) +def config(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache. + config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` + config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/my_configuration.json') + config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False) + assert config.output_attention == True + config, unused_kwargs = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True) + assert config.output_attention == True + assert unused_kwargs == {'foo': False} + + """ + + return AutoConfig.from_pretrained(*args, **kwargs) + + +@add_start_docstrings(AutoTokenizer.__doc__) +def tokenizer(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache. + tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` + + """ + + return AutoTokenizer.from_pretrained(*args, **kwargs) + + +@add_start_docstrings(AutoModel.__doc__) +def model(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache. + model = torch.hub.load('huggingface/pytorch-transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') + model = torch.hub.load('huggingface/pytorch-transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) + + """ + + return AutoModel.from_pretrained(*args, **kwargs) + +@add_start_docstrings(AutoModelWithLMHead.__doc__) +def modelWithLMHead(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache. + model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') + model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) + + """ + return AutoModelWithLMHead.from_pretrained(*args, **kwargs) + + +@add_start_docstrings(AutoModelForSequenceClassification.__doc__) +def modelForSequenceClassification(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache. + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) + + """ + + return AutoModelForSequenceClassification.from_pretrained(*args, **kwargs) + + +@add_start_docstrings(AutoModelForQuestionAnswering.__doc__) +def modelForQuestionAnswering(*args, **kwargs): + r""" + # Using torch.hub ! + import torch + + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache. + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading + assert model.config.output_attention == True + # Loading from a TF checkpoint file instead of a PyTorch model (slower) + config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') + model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) + + """ + return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs) diff --git a/hubconfs/automodels_hubconf.py b/hubconfs/automodels_hubconf.py deleted file mode 100644 index 5c1ab5ebc6..0000000000 --- a/hubconfs/automodels_hubconf.py +++ /dev/null @@ -1,110 +0,0 @@ -from pytorch_transformers import ( - AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering -) -from pytorch_transformers.modeling_utils import add_start_docstrings - -@add_start_docstrings(AutoConfig.__doc__) -def config(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache. - config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` - config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/my_configuration.json') - config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False) - assert config.output_attention == True - config, unused_kwargs = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True) - assert config.output_attention == True - assert unused_kwargs == {'foo': False} - - """ - - return AutoConfig.from_pretrained(*args, **kwargs) - - -@add_start_docstrings(AutoTokenizer.__doc__) -def tokenizer(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache. - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` - - """ - - return AutoTokenizer.from_pretrained(*args, **kwargs) - - -@add_start_docstrings(AutoModel.__doc__) -def model(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache. - model = torch.hub.load('huggingface/pytorch-transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading - assert model.config.output_attention == True - # Loading from a TF checkpoint file instead of a PyTorch model (slower) - config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') - model = torch.hub.load('huggingface/pytorch-transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) - - """ - - return AutoModel.from_pretrained(*args, **kwargs) - -@add_start_docstrings(AutoModelWithLMHead.__doc__) -def modelWithLMHead(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache. - model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading - assert model.config.output_attention == True - # Loading from a TF checkpoint file instead of a PyTorch model (slower) - config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') - model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) - - """ - return AutoModelWithLMHead.from_pretrained(*args, **kwargs) - - -@add_start_docstrings(AutoModelForSequenceClassification.__doc__) -def modelForSequenceClassification(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache. - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading - assert model.config.output_attention == True - # Loading from a TF checkpoint file instead of a PyTorch model (slower) - config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) - - """ - - return AutoModelForSequenceClassification.from_pretrained(*args, **kwargs) - - -@add_start_docstrings(AutoModelForQuestionAnswering.__doc__) -def modelForQuestionAnswering(*args, **kwargs): - r""" - # Using torch.hub ! - import torch - - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache. - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading - assert model.config.output_attention == True - # Loading from a TF checkpoint file instead of a PyTorch model (slower) - config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') - model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) - - """ - return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs) diff --git a/hubconfs/bert_hubconf.py b/hubconfs/bert_hubconf.py deleted file mode 100644 index 6e2830617f..0000000000 --- a/hubconfs/bert_hubconf.py +++ /dev/null @@ -1,360 +0,0 @@ -from pytorch_transformers.tokenization_bert import BertTokenizer -from pytorch_transformers.modeling_bert import ( - BertModel, - BertForNextSentencePrediction, - BertForMaskedLM, - BertForMultipleChoice, - BertForPreTraining, - BertForQuestionAnswering, - BertForSequenceClassification, - BertForTokenClassification, - ) - -# A lot of models share the same param doc. Use a decorator -# to save typing -bert_docstring = """ - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load - . `bert-base-uncased` - . `bert-large-uncased` - . `bert-base-cased` - . `bert-large-cased` - . `bert-base-multilingual-uncased` - . `bert-base-multilingual-cased` - . `bert-base-chinese` - . `bert-base-german-cased` - . `bert-large-uncased-whole-word-masking` - . `bert-large-cased-whole-word-masking` - - a path or url to a pretrained model archive containing: - . `bert_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining - instance - - a path or url to a pretrained model archive containing: - . `bert_config.json` a configuration file for the model - . `model.chkpt` a TensorFlow checkpoint - from_tf: should we load the weights from a locally saved TensorFlow - checkpoint - cache_dir: an optional path to a folder in which the pre-trained models - will be cached. - state_dict: an optional state dictionary - (collections.OrderedDict object) to use instead of Google - pre-trained models - *inputs, **kwargs: additional input for the specific Bert class - (ex: num_labels for BertForSequenceClassification) -""" - - -def _append_from_pretrained_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def bertTokenizer(*args, **kwargs): - """ - Instantiate a BertTokenizer from a pre-trained/customized vocab file - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * bert-base-uncased - * bert-large-uncased - * bert-base-cased - * bert-large-cased - * bert-base-multilingual-uncased - * bert-base-multilingual-cased - * bert-base-chinese - Keyword args: - cache_dir: an optional path to a specific directory to download and cache - the pre-trained model weights. - Default: None - do_lower_case: Whether to lower case the input. - Only has an effect when do_wordpiece_only=False - Default: True - do_basic_tokenize: Whether to do basic tokenization before wordpiece. - Default: True - max_len: An artificial maximum length to truncate tokenized sequences to; - Effective maximum length is always the minimum of this - value (if specified) and the underlying BERT model's - sequence length. - Default: None - never_split: List of tokens which will never be split during tokenization. - Only has an effect when do_wordpiece_only=False - Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] - - Example: - import torch - sentence = 'Hello, World!' - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - toks = tokenizer.tokenize(sentence) - ['Hello', '##,', 'World', '##!'] - ids = tokenizer.convert_tokens_to_ids(toks) - [8667, 28136, 1291, 28125] - """ - tokenizer = BertTokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_append_from_pretrained_docstring(bert_docstring) -def bertModel(*args, **kwargs): - """ - BertModel is the basic BERT Transformer model with a layer of summed token, - position and sequence embeddings followed by a series of identical - self-attention blocks (12 for BERT-base, 24 for BERT-large). - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertModel - model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') - model.eval() - # Predict hidden states features for each layer - with torch.no_grad(): - encoded_layers, _ = model(tokens_tensor, segments_tensors) - """ - model = BertModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForNextSentencePrediction(*args, **kwargs): - """ - BERT model with next sentence prediction head. - This module comprises the BERT model followed by the next sentence - classification head. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForNextSentencePrediction - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') - model.eval() - # Predict the next sentence classification logits - with torch.no_grad(): - next_sent_classif_logits = model(tokens_tensor, segments_tensors) - """ - model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForPreTraining(*args, **kwargs): - """ - BERT model with pre-training heads. - This module comprises the BERT model followed by the two pre-training heads - - the masked language modeling head, and - - the next sentence classification head. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForPreTraining - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') - masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) - """ - model = BertForPreTraining.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForMaskedLM(*args, **kwargs): - """ - BertForMaskedLM includes the BertModel Transformer followed by the - (possibly) pre-trained masked language modeling head. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - masked_index = 8 - tokenized_text[masked_index] = '[MASK]' - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForMaskedLM - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') - model.eval() - # Predict all tokens - with torch.no_grad(): - predictions = model(tokens_tensor, segments_tensors) - predicted_index = torch.argmax(predictions[0, masked_index]).item() - predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] - 'henson' - """ - model = BertForMaskedLM.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForSequenceClassification(*args, **kwargs): - """ - BertForSequenceClassification is a fine-tuning model that includes - BertModel and a sequence-level (sequence or pair of sequences) classifier - on top of the BertModel. Note that the classification head is only initialized - and has to be trained. - - The sequence-level classifier is a linear layer that takes as input the - last hidden state of the first character in the input sequence - (see Figures 3a and 3b in the BERT paper). - - Args: - num_labels: the number (>=2) of classes for the classifier. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForSequenceClassification - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) - model.eval() - # Predict the sequence classification logits - with torch.no_grad(): - seq_classif_logits = model(tokens_tensor, segments_tensors) - # Or get the sequence classification loss - labels = torch.tensor([1]) - seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss - """ - model = BertForSequenceClassification.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForMultipleChoice(*args, **kwargs): - """ - BertForMultipleChoice is a fine-tuning model that includes BertModel and a - linear layer on top of the BertModel. Note that the multiple choice head is - only initialized and has to be trained. - - Args: - num_choices: the number (>=2) of classes for the classifier. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) - segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) - # Load bertForMultipleChoice - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) - model.eval() - # Predict the multiple choice logits - with torch.no_grad(): - multiple_choice_logits = model(tokens_tensor, segments_tensors) - # Or get the multiple choice loss - labels = torch.tensor([1]) - multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss - """ - model = BertForMultipleChoice.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForQuestionAnswering(*args, **kwargs): - """ - BertForQuestionAnswering is a fine-tuning model that includes BertModel - with a token-level classifiers on top of the full sequence of last hidden - states. Note that the classification head is only initialized - and has to be trained. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForQuestionAnswering - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') - model.eval() - # Predict the start and end positions logits - with torch.no_grad(): - start_logits, end_logits = model(tokens_tensor, segments_tensors) - # Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions - start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) - # set model.train() before if training this loss - multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) - """ - model = BertForQuestionAnswering.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(bert_docstring) -def bertForTokenClassification(*args, **kwargs): - """ - BertForTokenClassification is a fine-tuning model that includes BertModel - and a token-level classifier on top of the BertModel. Note that the classification - head is only initialized and has to be trained. - - The token-level classifier is a linear layer that takes as input the last - hidden state of the sequence. - - Args: - num_labels: the number (>=2) of classes for the classifier. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) - # Prepare tokenized input - text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - tokens_tensor = torch.tensor([indexed_tokens]) - segments_tensors = torch.tensor([segments_ids]) - # Load bertForTokenClassification - model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) - model.eval() - # Predict the token classification logits - with torch.no_grad(): - classif_logits = model(tokens_tensor, segments_tensors) - # Or get the token classification loss - labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) - classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss - """ - model = BertForTokenClassification.from_pretrained(*args, **kwargs) - return model diff --git a/hubconfs/gpt2_hubconf.py b/hubconfs/gpt2_hubconf.py deleted file mode 100644 index 18afad3913..0000000000 --- a/hubconfs/gpt2_hubconf.py +++ /dev/null @@ -1,168 +0,0 @@ -from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer -from pytorch_transformers.modeling_gpt2 import ( - GPT2Model, - GPT2LMHeadModel, - GPT2DoubleHeadsModel -) - -# A lot of models share the same param doc. Use a decorator -# to save typing -gpt2_docstring = """ - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `gpt2`, `gpt2-medium` - - a path or url to a pretrained model archive containing: - . `gpt2_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a GPT2Model instance - - a path or url to a pretrained model archive containing: - . `gpt2_config.json` a configuration file for the model - . a TensorFlow checkpoint with trained weights - from_tf: should we load the weights from a locally saved TensorFlow checkpoint - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models - *inputs, **kwargs: additional input for the specific GPT-2 class -""" - - -def _append_from_pretrained_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def gpt2Tokenizer(*args, **kwargs): - """ - Instantiate a GPT-2 BPE tokenizer for OpenAI GPT-2 from a pre-trained/customized vocab file. - Peculiarities: - - Byte-level BPE - - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * gpt2 - Keyword args: - special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...) - Default: None - max_len: An artificial maximum length to truncate tokenized sequences to; - Effective maximum length is always the minimum of this - value (if specified) and the underlying BERT model's - sequence length. - Default: None - - Example: - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') - - text = "Who was Jim Henson ?" - indexed_tokens = tokenizer.encode(tokenized_text) - """ - tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_append_from_pretrained_docstring(gpt2_docstring) -def gpt2Model(*args, **kwargs): - """ - gpt2Model is the basic OpenAI GPT-2 Transformer model based on - identical stacked masked self-attention blocks and pre-trained - on large scale dataset using language modeling signal. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load gpt2Model - model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') - model.eval() - - # Predict hidden states features for each layer - # past can be used to reuse precomputed hidden state in a subsequent predictions - with torch.no_grad(): - hidden_states_1, past = model(tokens_tensor_1) - hidden_states_2, past = model(tokens_tensor_2, past=past) - """ - model = GPT2Model.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(gpt2_docstring) -def gpt2LMHeadModel(*args, **kwargs): - """ - gpt2LMHeadModel is the OpenAI GPT-2 Transformer model with the - tied (pre-trained) language modeling head on top. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load gpt2LMHeadModel - model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') - model.eval() - - # Predict hidden states features for each layer - # past can be used to reuse precomputed hidden state in a subsequent predictions - with torch.no_grad(): - predictions_1, past = model(tokens_tensor_1) - predictions_2, past = model(tokens_tensor_2, past=past) - - # Get the predicted last token - predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - predicted_token = tokenizer.decode([predicted_index]) - assert predicted_token == ' who' - """ - model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(gpt2_docstring) -def gpt2DoubleHeadsModel(*args, **kwargs): - """ - gpt2DoubleHeadsModel is the OpenAI GPT-2 Transformer model with the - tied (pre-trained) language modeling head and a multiple choice - classification head (only initialized, not pre-trained). - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') - - # Prepare tokenized input - text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" - text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" - tokenized_text1 = tokenizer.tokenize(text1) - tokenized_text2 = tokenizer.tokenize(text2) - indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) - indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) - tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) - mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) - - # Load gpt2DoubleHeadsModel - model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids) - """ - model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs) - return model diff --git a/hubconfs/gpt_hubconf.py b/hubconfs/gpt_hubconf.py deleted file mode 100644 index 649075980c..0000000000 --- a/hubconfs/gpt_hubconf.py +++ /dev/null @@ -1,186 +0,0 @@ -from pytorch_transformers.tokenization_openai import OpenAIGPTTokenizer -from pytorch_transformers.modeling_openai import ( - OpenAIGPTModel, - OpenAIGPTLMHeadModel, - OpenAIGPTDoubleHeadsModel -) - -# Dependecies that are not specified in global hubconf.py -specific_dependencies = ['spacy', 'ftfy'] - -# A lot of models share the same param doc. Use a decorator -# to save typing -gpt_docstring = """ - OpenAI GPT use a single embedding matrix to store the word and special embeddings. - Special tokens embeddings are additional tokens that are not pre-trained: [SEP], [CLS]... - Special tokens need to be trained during the fine-tuning if you use them. - The number of special embeddings can be controled using the `set_num_special_tokens(num_special_tokens)` function. - - The embeddings are ordered as follow in the token embeddings matrice: - [0, ---------------------- - ... -> word embeddings - config.vocab_size - 1, ______________________ - config.vocab_size, - ... -> special embeddings - config.vocab_size + config.n_special - 1] ______________________ - - where total_tokens_embeddings can be obtained as config.total_tokens_embeddings and is: - total_tokens_embeddings = config.vocab_size + config.n_special - You should use the associate indices to index the embeddings. - - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `openai-gpt` - - a path or url to a pretrained model archive containing: - . `openai_gpt_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a OpenAIGPTModel instance - - a path or url to a pretrained model archive containing: - . `openai-gpt-config.json` a configuration file for the model - . a series of NumPy files containing OpenAI TensorFlow trained weights - from_tf: should we load the weights from a locally saved TensorFlow checkpoint - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionary (collections.OrderedDict object) - to use instead of pre-trained models - *inputs, **kwargs: additional input for the specific OpenAI-GPT class -""" - - -def _append_from_pretrained_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def openAIGPTTokenizer(*args, **kwargs): - """ - Instantiate a BPE tokenizer for OpenAI GPT from a pre-trained/customized vocab file. - Peculiarities: - - lower case all inputs - - uses SpaCy tokenizer ('en' model) and ftfy for pre-BPE tokenization if they are installed, fallback to BERT's BasicTokenizer if not. - - argument special_tokens and function set_special_tokens: - can be used to add additional symbols (ex: "__classify__") to a vocabulary. - - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * openai-gpt - Keyword args: - special_tokens: Special tokens in vocabulary that are not pretrained ([SEP], [CLS]...) - Default: None - max_len: An artificial maximum length to truncate tokenized sequences to; - Effective maximum length is always the minimum of this - value (if specified) and the underlying BERT model's - sequence length. - Default: None - - Example: - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') - - text = "Who was Jim Henson ? Jim Henson was a puppeteer" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - [763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483] - """ - tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_append_from_pretrained_docstring(gpt_docstring) -def openAIGPTModel(*args, **kwargs): - """ - OpenAIGPTModel is the basic OpenAI GPT Transformer model based on - identical stacked masked self-attention blocks and pre-trained - on large scale dataset using language modeling signal. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') - - # Prepare tokenized input - text = "Who was Jim Henson ? Jim Henson was a puppeteer" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - tokens_tensor = torch.tensor([indexed_tokens]) - - # Load openAIGPTModel - model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - hidden_states = model(tokens_tensor) - """ - model = OpenAIGPTModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(gpt_docstring) -def openAIGPTLMHeadModel(*args, **kwargs): - """ - OpenAIGPTLMHeadModel is the OpenAI GPT Transformer model with the - tied (pre-trained) language modeling head on top. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') - - # Prepare tokenized input - text = "Who was Jim Henson ? Jim Henson was a puppeteer" - tokenized_text = tokenizer.tokenize(text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - tokens_tensor = torch.tensor([indexed_tokens]) - - # Load openAIGPTLMHeadModel - model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - predictions = model(tokens_tensor) - - # Get the predicted last token - predicted_index = torch.argmax(predictions[0, -1, :]).item() - predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] - '.' - """ - model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(gpt_docstring) -def openAIGPTDoubleHeadsModel(*args, **kwargs): - """ - OpenAIGPTDoubleHeadsModel is the OpenAI GPT Transformer model with the - tied (pre-trained) language modeling head and a multiple choice - classification head (only initialized, not pre-trained). - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') - - # Prepare tokenized input - text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" - text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" - tokenized_text1 = tokenizer.tokenize(text1) - tokenized_text2 = tokenizer.tokenize(text2) - indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) - indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) - tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) - mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) - - # Load openAIGPTDoubleHeadsModel - model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids) - """ - model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs) - return model diff --git a/hubconfs/transformer_xl_hubconf.py b/hubconfs/transformer_xl_hubconf.py deleted file mode 100644 index 548d407581..0000000000 --- a/hubconfs/transformer_xl_hubconf.py +++ /dev/null @@ -1,130 +0,0 @@ -from pytorch_transformers.tokenization_transfo_xl import TransfoXLTokenizer -from pytorch_transformers.modeling_transfo_xl import ( - TransfoXLModel, - TransfoXLLMHeadModel -) - -# A lot of models share the same param doc. Use a decorator -# to save typing -transformer_xl_docstring = """ - Transformer XL use a relative positioning (with sinusiodal patterns) and adaptive softmax inputs which means that: - - you don't need to specify positioning embeddings indices - - the tokens in the vocabulary have to be sorted to decreasing frequency. - - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `transfo-xl-wt103` - - a path or url to a pretrained model archive containing: - . `transfo_xl_config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a TransfoXLModel instance - - a path or url to a pretrained model archive containing: - . `transfo_xl_config.json` a configuration file for the model - . `model.chkpt` a TensorFlow checkpoint - from_tf: should we load the weights from a locally saved TensorFlow checkpoint - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models - *inputs, **kwargs: additional input for the specific TransformerXL class -""" - - -def _append_from_pretrained_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def transformerXLTokenizer(*args, **kwargs): - """ - Instantiate a Transformer-XL tokenizer adapted from Vocab class in https://github.com/kimiyoung/transformer-xl - - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * transfo-xl-wt103 - - Example: - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') - - text = "Who was Jim Henson ?" - tokenized_text = tokenizer.tokenize(tokenized_text) - indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) - """ - tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_append_from_pretrained_docstring(transformer_xl_docstring) -def transformerXLModel(*args, **kwargs): - """ - transformerXLModel is the basic Transformer XL model. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - tokenized_text_1 = tokenizer.tokenize(text_1) - tokenized_text_2 = tokenizer.tokenize(text_2) - indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) - indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load transformerXLModel - model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') - model.eval() - - # Predict hidden states features for each layer - # We can re-use the memory cells in a subsequent call to attend a longer context - with torch.no_grad(): - hidden_states_1, mems_1 = model(tokens_tensor_1) - hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) - """ - model = TransfoXLModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(transformer_xl_docstring) -def transformerXLLMHeadModel(*args, **kwargs): - """ - transformerXLModel is the basic Transformer XL model with the - tied (pre-trained) language modeling head on top. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - tokenized_text_1 = tokenizer.tokenize(text_1) - tokenized_text_2 = tokenizer.tokenize(text_2) - indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) - indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load transformerXLLMHeadModel - model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') - model.eval() - - # Predict hidden states features for each layer - # We can re-use the memory cells in a subsequent call to attend a longer context - with torch.no_grad(): - predictions_1, mems_1 = model(tokens_tensor_1) - predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) - - # Get the predicted last token - predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] - assert predicted_token == 'who' - """ - model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) - return model diff --git a/hubconfs/xlm_hubconf.py b/hubconfs/xlm_hubconf.py deleted file mode 100644 index e96d923944..0000000000 --- a/hubconfs/xlm_hubconf.py +++ /dev/null @@ -1,167 +0,0 @@ -from pytorch_transformers.tokenization_xlm import XLMTokenizer -from pytorch_transformers.modeling_xlm import ( - XLMConfig, - XLMModel, - XLMWithLMHeadModel, - XLMForSequenceClassification, - XLMForQuestionAnswering -) - -# A lot of models share the same param doc. Use a decorator -# to save typing -xlm_start_docstring = """ - Model class adapted from the XLM Transformer model of - "Cross-lingual Language Model Pretraining" by Guillaume Lample, Alexis Conneau - Paper: https://arxiv.org/abs/1901.07291 - Original code: https://github.com/facebookresearch/XLM - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) -""" - -# A lot of models share the same param doc. Use a decorator -# to save typing -xlm_end_docstring = """ - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `xlm-mlm-en-2048` - - a path or url to a pretrained model archive containing: - . `config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump created using the `convert_xlm_checkpoint_to_pytorch` conversion script - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models - *inputs, **kwargs: additional input for the specific XLM class -""" - - -def _begin_with_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - -def _end_with_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def xlmTokenizer(*args, **kwargs): - """ - Instantiate a XLM BPE tokenizer for XLM from a pre-trained vocab file. - - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * xlm-mlm-en-2048 - Keyword args: - special_tokens: Special tokens in vocabulary that are not pretrained - Default: None - max_len: An artificial maximum length to truncate tokenized sequences to; - Effective maximum length is always the minimum of this - value (if specified) and the underlying model's - sequence length. - Default: None - - Example: - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') - - text = "Who was Jim Henson ?" - indexed_tokens = tokenizer.encode(tokenized_text) - """ - tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_begin_with_docstring(xlm_start_docstring) -@_end_with_docstring(xlm_end_docstring) -def xlmModel(*args, **kwargs): - """ - # Load xlmModel - model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - hidden_states_1, mems = model(tokens_tensor_1) - hidden_states_2, mems = model(tokens_tensor_2, past=mems) - """ - model = XLMModel.from_pretrained(*args, **kwargs) - return model - - -@_begin_with_docstring(xlm_start_docstring) -@_end_with_docstring(xlm_end_docstring) -def xlmLMHeadModel(*args, **kwargs): - """ - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load xlnetLMHeadModel - model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - predictions_1, mems = model(tokens_tensor_1) - predictions_2, mems = model(tokens_tensor_2, mems=mems) - - # Get the predicted last token - predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - predicted_token = tokenizer.decode([predicted_index]) - assert predicted_token == ' who' - """ - model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs) - return model - - -# @_end_with_docstring(xlnet_docstring) -# def xlnetForSequenceClassification(*args, **kwargs): -# """ -# xlnetModel is the basic XLNet Transformer model from -# "XLNet: Generalized Autoregressive Pretraining for Language Understanding" -# by Zhilin Yang, Zihang Dai1, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le - -# Example: -# # Load the tokenizer -# import torch -# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') - -# # Prepare tokenized input -# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" -# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" -# tokenized_text1 = tokenizer.tokenize(text1) -# tokenized_text2 = tokenizer.tokenize(text2) -# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) -# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) -# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) -# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) - -# # Load xlnetForSequenceClassification -# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') -# model.eval() - -# # Predict sequence classes logits -# with torch.no_grad(): -# lm_logits, mems = model(tokens_tensor) -# """ -# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) -# return model diff --git a/hubconfs/xlnet_hubconf.1.py b/hubconfs/xlnet_hubconf.1.py deleted file mode 100644 index fa7b7ddb9f..0000000000 --- a/hubconfs/xlnet_hubconf.1.py +++ /dev/null @@ -1,169 +0,0 @@ -from pytorch_transformers.tokenization_xlnet import XLNetTokenizer -from pytorch_transformers.modeling_xlnet import ( - XLNetConfig, - XLNetModel, - XLNetLMHeadModel, - # XLNetForSequenceClassification -) - -# A lot of models share the same param doc. Use a decorator -# to save typing -xlnet_docstring = """ - Params: - pretrained_model_name_or_path: either: - - a str with the name of a pre-trained model to load selected in the list of: - . `xlnet-large-cased` - - a path or url to a pretrained model archive containing: - . `config.json` a configuration file for the model - . `pytorch_model.bin` a PyTorch dump of a XLNetForPreTraining instance - - a path or url to a pretrained model archive containing: - . `xlnet_config.json` a configuration file for the model - . `model.chkpt` a TensorFlow checkpoint - from_tf: should we load the weights from a locally saved TensorFlow checkpoint - cache_dir: an optional path to a folder in which the pre-trained models will be cached. - state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models - *inputs, **kwargs: additional input for the specific XLNet class -""" - - -def _append_from_pretrained_docstring(docstr): - def docstring_decorator(fn): - fn.__doc__ = fn.__doc__ + docstr - return fn - return docstring_decorator - - -def xlnetTokenizer(*args, **kwargs): - """ - Instantiate a XLNet sentencepiece tokenizer for XLNet from a pre-trained vocab file. - Peculiarities: - - require Google sentencepiece (https://github.com/google/sentencepiece) - - Args: - pretrained_model_name_or_path: Path to pretrained model archive - or one of pre-trained vocab configs below. - * xlnet-large-cased - Keyword args: - special_tokens: Special tokens in vocabulary that are not pretrained - Default: None - max_len: An artificial maximum length to truncate tokenized sequences to; - Effective maximum length is always the minimum of this - value (if specified) and the underlying model's - sequence length. - Default: None - - Example: - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') - - text = "Who was Jim Henson ?" - indexed_tokens = tokenizer.encode(tokenized_text) - """ - tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs) - return tokenizer - - -@_append_from_pretrained_docstring(xlnet_docstring) -def xlnetModel(*args, **kwargs): - """ - xlnetModel is the basic XLNet Transformer model from - "XLNet: Generalized Autoregressive Pretraining for Language Understanding" - by Zhilin Yang, Zihang Dai1, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load xlnetModel - model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - hidden_states_1, mems = model(tokens_tensor_1) - hidden_states_2, mems = model(tokens_tensor_2, past=mems) - """ - model = XLNetModel.from_pretrained(*args, **kwargs) - return model - - -@_append_from_pretrained_docstring(xlnet_docstring) -def xlnetLMHeadModel(*args, **kwargs): - """ - xlnetModel is the basic XLNet Transformer model from - "XLNet: Generalized Autoregressive Pretraining for Language Understanding" - by Zhilin Yang, Zihang Dai1, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le - with a tied (pre-trained) language modeling head on top. - - Example: - # Load the tokenizer - import torch - tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') - - # Prepare tokenized input - text_1 = "Who was Jim Henson ?" - text_2 = "Jim Henson was a puppeteer" - indexed_tokens_1 = tokenizer.encode(text_1) - indexed_tokens_2 = tokenizer.encode(text_2) - tokens_tensor_1 = torch.tensor([indexed_tokens_1]) - tokens_tensor_2 = torch.tensor([indexed_tokens_2]) - - # Load xlnetLMHeadModel - model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') - model.eval() - - # Predict hidden states features for each layer - with torch.no_grad(): - predictions_1, mems = model(tokens_tensor_1) - predictions_2, mems = model(tokens_tensor_2, mems=mems) - - # Get the predicted last token - predicted_index = torch.argmax(predictions_2[0, -1, :]).item() - predicted_token = tokenizer.decode([predicted_index]) - assert predicted_token == ' who' - """ - model = XLNetLMHeadModel.from_pretrained(*args, **kwargs) - return model - - -# @_append_from_pretrained_docstring(xlnet_docstring) -# def xlnetForSequenceClassification(*args, **kwargs): -# """ -# xlnetModel is the basic XLNet Transformer model from -# "XLNet: Generalized Autoregressive Pretraining for Language Understanding" -# by Zhilin Yang, Zihang Dai1, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le - -# Example: -# # Load the tokenizer -# import torch -# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') - -# # Prepare tokenized input -# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" -# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" -# tokenized_text1 = tokenizer.tokenize(text1) -# tokenized_text2 = tokenizer.tokenize(text2) -# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) -# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) -# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) -# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) - -# # Load xlnetForSequenceClassification -# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') -# model.eval() - -# # Predict sequence classes logits -# with torch.no_grad(): -# lm_logits, mems = model(tokens_tensor) -# """ -# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) -# return model diff --git a/pytorch_transformers/modeling_auto.py b/pytorch_transformers/modeling_auto.py index 0c328909c2..05ff5e5b33 100644 --- a/pytorch_transformers/modeling_auto.py +++ b/pytorch_transformers/modeling_auto.py @@ -18,11 +18,6 @@ from __future__ import absolute_import, division, print_function, unicode_litera import logging -import torch -import torch.nn as nn -from torch.nn import CrossEntropyLoss, MSELoss -from torch.nn.parameter import Parameter - from .modeling_bert import BertConfig, BertModel, BertForMaskedLM, BertForSequenceClassification, BertForQuestionAnswering from .modeling_openai import OpenAIGPTConfig, OpenAIGPTModel, OpenAIGPTLMHeadModel from .modeling_gpt2 import GPT2Config, GPT2Model, GPT2LMHeadModel