diff --git a/.circleci/config.yml b/.circleci/config.yml index 858ca001d6..ac23723f98 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2 jobs: build_py3: - working_directory: ~/pytorch-pretrained-BERT + working_directory: ~/pytorch-transformers docker: - image: circleci/python:3.5 steps: @@ -10,11 +10,11 @@ jobs: - run: sudo pip install pytest codecov pytest-cov - run: sudo pip install spacy ftfy==4.4.3 - run: sudo python -m spacy download en - - run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov + - run: python -m pytest -sv ./pytorch_transformers/tests/ --cov - run: codecov parallelism: 4 build_py2: - working_directory: ~/pytorch-pretrained-BERT + working_directory: ~/pytorch-transformers docker: - image: circleci/python:2.7 steps: @@ -23,7 +23,7 @@ jobs: - run: sudo pip install pytest codecov pytest-cov - run: sudo pip install spacy ftfy==4.4.3 - run: sudo python -m spacy download en - - run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov + - run: python -m pytest -sv ./pytorch_transformers/tests/ --cov - run: codecov parallelism: 4 workflows: diff --git a/.coveragerc b/.coveragerc index fe05dda9a8..9b8c40ecf1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,5 @@ [run] -source=pytorch_pretrained_bert +source=pytorch_transformers [report] exclude_lines = pragma: no cover diff --git a/README.md b/README.md index a5234bd9ba..b1e80edc89 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # PyTorch Pretrained BERT: The Big & Extending Repository of pretrained Transformers -[![CircleCI](https://circleci.com/gh/huggingface/pytorch-pretrained-BERT.svg?style=svg)](https://circleci.com/gh/huggingface/pytorch-pretrained-BERT) +[![CircleCI](https://circleci.com/gh/huggingface/pytorch-transformers.svg?style=svg)](https://circleci.com/gh/huggingface/pytorch-transformers) This repository contains op-for-op PyTorch reimplementations, pre-trained models and fine-tuning examples for: @@ -47,7 +47,7 @@ This repo was tested on Python 2.7 and 3.5+ (examples are tested only on python PyTorch pretrained bert can be installed by pip as follows: ```bash -pip install pytorch-pretrained-bert +pip install pytorch-transformers ``` If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` (limit to version 4.4.3 if you are using Python 2) and `SpaCy` : @@ -73,7 +73,7 @@ python -m spacy download en Again, if you don't install `ftfy` and `SpaCy`, the `OpenAI GPT` tokenizer will default to tokenize using BERT's `BasicTokenizer` followed by Byte-Pair Encoding (which should be fine for most usage). -A series of tests is included in the [tests folder](https://github.com/huggingface/pytorch-pretrained-BERT/tree/master/tests) and can be run using `pytest` (install pytest if needed: `pip install pytest`). +A series of tests is included in the [tests folder](https://github.com/huggingface/pytorch-transformers/tree/master/tests) and can be run using `pytest` (install pytest if needed: `pip install pytest`). You can run the tests with the command: ```bash @@ -84,51 +84,51 @@ python -m pytest -sv tests/ This package comprises the following classes that can be imported in Python and are detailed in the [Doc](#doc) section of this readme: -- Eight **Bert** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling.py`](./pytorch_pretrained_bert/modeling.py) file): - - [`BertModel`](./pytorch_pretrained_bert/modeling.py#L639) - raw BERT Transformer model (**fully pre-trained**), - - [`BertForMaskedLM`](./pytorch_pretrained_bert/modeling.py#L793) - BERT Transformer with the pre-trained masked language modeling head on top (**fully pre-trained**), - - [`BertForNextSentencePrediction`](./pytorch_pretrained_bert/modeling.py#L854) - BERT Transformer with the pre-trained next sentence prediction classifier on top (**fully pre-trained**), - - [`BertForPreTraining`](./pytorch_pretrained_bert/modeling.py#L722) - BERT Transformer with masked language modeling head and next sentence prediction classifier on top (**fully pre-trained**), - - [`BertForSequenceClassification`](./pytorch_pretrained_bert/modeling.py#L916) - BERT Transformer with a sequence classification head on top (BERT Transformer is **pre-trained**, the sequence classification head **is only initialized and has to be trained**), - - [`BertForMultipleChoice`](./pytorch_pretrained_bert/modeling.py#L982) - BERT Transformer with a multiple choice head on top (used for task like Swag) (BERT Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), - - [`BertForTokenClassification`](./pytorch_pretrained_bert/modeling.py#L1051) - BERT Transformer with a token classification head on top (BERT Transformer is **pre-trained**, the token classification head **is only initialized and has to be trained**), - - [`BertForQuestionAnswering`](./pytorch_pretrained_bert/modeling.py#L1124) - BERT Transformer with a token classification head on top (BERT Transformer is **pre-trained**, the token classification head **is only initialized and has to be trained**). +- Eight **Bert** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling.py`](./pytorch_transformers/modeling.py) file): + - [`BertModel`](./pytorch_transformers/modeling.py#L639) - raw BERT Transformer model (**fully pre-trained**), + - [`BertForMaskedLM`](./pytorch_transformers/modeling.py#L793) - BERT Transformer with the pre-trained masked language modeling head on top (**fully pre-trained**), + - [`BertForNextSentencePrediction`](./pytorch_transformers/modeling.py#L854) - BERT Transformer with the pre-trained next sentence prediction classifier on top (**fully pre-trained**), + - [`BertForPreTraining`](./pytorch_transformers/modeling.py#L722) - BERT Transformer with masked language modeling head and next sentence prediction classifier on top (**fully pre-trained**), + - [`BertForSequenceClassification`](./pytorch_transformers/modeling.py#L916) - BERT Transformer with a sequence classification head on top (BERT Transformer is **pre-trained**, the sequence classification head **is only initialized and has to be trained**), + - [`BertForMultipleChoice`](./pytorch_transformers/modeling.py#L982) - BERT Transformer with a multiple choice head on top (used for task like Swag) (BERT Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), + - [`BertForTokenClassification`](./pytorch_transformers/modeling.py#L1051) - BERT Transformer with a token classification head on top (BERT Transformer is **pre-trained**, the token classification head **is only initialized and has to be trained**), + - [`BertForQuestionAnswering`](./pytorch_transformers/modeling.py#L1124) - BERT Transformer with a token classification head on top (BERT Transformer is **pre-trained**, the token classification head **is only initialized and has to be trained**). -- Three **OpenAI GPT** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_openai.py`](./pytorch_pretrained_bert/modeling_openai.py) file): - - [`OpenAIGPTModel`](./pytorch_pretrained_bert/modeling_openai.py#L536) - raw OpenAI GPT Transformer model (**fully pre-trained**), - - [`OpenAIGPTLMHeadModel`](./pytorch_pretrained_bert/modeling_openai.py#L643) - OpenAI GPT Transformer with the tied language modeling head on top (**fully pre-trained**), - - [`OpenAIGPTDoubleHeadsModel`](./pytorch_pretrained_bert/modeling_openai.py#L722) - OpenAI GPT Transformer with the tied language modeling head and a multiple choice classification head on top (OpenAI GPT Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), +- Three **OpenAI GPT** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_openai.py`](./pytorch_transformers/modeling_openai.py) file): + - [`OpenAIGPTModel`](./pytorch_transformers/modeling_openai.py#L536) - raw OpenAI GPT Transformer model (**fully pre-trained**), + - [`OpenAIGPTLMHeadModel`](./pytorch_transformers/modeling_openai.py#L643) - OpenAI GPT Transformer with the tied language modeling head on top (**fully pre-trained**), + - [`OpenAIGPTDoubleHeadsModel`](./pytorch_transformers/modeling_openai.py#L722) - OpenAI GPT Transformer with the tied language modeling head and a multiple choice classification head on top (OpenAI GPT Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), -- Two **Transformer-XL** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_transfo_xl.py`](./pytorch_pretrained_bert/modeling_transfo_xl.py) file): - - [`TransfoXLModel`](./pytorch_pretrained_bert/modeling_transfo_xl.py#L983) - Transformer-XL model which outputs the last hidden state and memory cells (**fully pre-trained**), - - [`TransfoXLLMHeadModel`](./pytorch_pretrained_bert/modeling_transfo_xl.py#L1260) - Transformer-XL with the tied adaptive softmax head on top for language modeling which outputs the logits/loss and memory cells (**fully pre-trained**), +- Two **Transformer-XL** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_transfo_xl.py`](./pytorch_transformers/modeling_transfo_xl.py) file): + - [`TransfoXLModel`](./pytorch_transformers/modeling_transfo_xl.py#L983) - Transformer-XL model which outputs the last hidden state and memory cells (**fully pre-trained**), + - [`TransfoXLLMHeadModel`](./pytorch_transformers/modeling_transfo_xl.py#L1260) - Transformer-XL with the tied adaptive softmax head on top for language modeling which outputs the logits/loss and memory cells (**fully pre-trained**), -- Three **OpenAI GPT-2** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_gpt2.py`](./pytorch_pretrained_bert/modeling_gpt2.py) file): - - [`GPT2Model`](./pytorch_pretrained_bert/modeling_gpt2.py#L479) - raw OpenAI GPT-2 Transformer model (**fully pre-trained**), - - [`GPT2LMHeadModel`](./pytorch_pretrained_bert/modeling_gpt2.py#L559) - OpenAI GPT-2 Transformer with the tied language modeling head on top (**fully pre-trained**), - - [`GPT2DoubleHeadsModel`](./pytorch_pretrained_bert/modeling_gpt2.py#L624) - OpenAI GPT-2 Transformer with the tied language modeling head and a multiple choice classification head on top (OpenAI GPT-2 Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), +- Three **OpenAI GPT-2** PyTorch models (`torch.nn.Module`) with pre-trained weights (in the [`modeling_gpt2.py`](./pytorch_transformers/modeling_gpt2.py) file): + - [`GPT2Model`](./pytorch_transformers/modeling_gpt2.py#L479) - raw OpenAI GPT-2 Transformer model (**fully pre-trained**), + - [`GPT2LMHeadModel`](./pytorch_transformers/modeling_gpt2.py#L559) - OpenAI GPT-2 Transformer with the tied language modeling head on top (**fully pre-trained**), + - [`GPT2DoubleHeadsModel`](./pytorch_transformers/modeling_gpt2.py#L624) - OpenAI GPT-2 Transformer with the tied language modeling head and a multiple choice classification head on top (OpenAI GPT-2 Transformer is **pre-trained**, the multiple choice classification head **is only initialized and has to be trained**), -- Tokenizers for **BERT** (using word-piece) (in the [`tokenization.py`](./pytorch_pretrained_bert/tokenization.py) file): +- Tokenizers for **BERT** (using word-piece) (in the [`tokenization.py`](./pytorch_transformers/tokenization.py) file): - `BasicTokenizer` - basic tokenization (punctuation splitting, lower casing, etc.), - `WordpieceTokenizer` - WordPiece tokenization, - `BertTokenizer` - perform end-to-end tokenization, i.e. basic tokenization followed by WordPiece tokenization. -- Tokenizer for **OpenAI GPT** (using Byte-Pair-Encoding) (in the [`tokenization_openai.py`](./pytorch_pretrained_bert/tokenization_openai.py) file): +- Tokenizer for **OpenAI GPT** (using Byte-Pair-Encoding) (in the [`tokenization_openai.py`](./pytorch_transformers/tokenization_openai.py) file): - `OpenAIGPTTokenizer` - perform Byte-Pair-Encoding (BPE) tokenization. -- Tokenizer for **Transformer-XL** (word tokens ordered by frequency for adaptive softmax) (in the [`tokenization_transfo_xl.py`](./pytorch_pretrained_bert/tokenization_transfo_xl.py) file): +- Tokenizer for **Transformer-XL** (word tokens ordered by frequency for adaptive softmax) (in the [`tokenization_transfo_xl.py`](./pytorch_transformers/tokenization_transfo_xl.py) file): - `OpenAIGPTTokenizer` - perform word tokenization and can order words by frequency in a corpus for use in an adaptive softmax. -- Tokenizer for **OpenAI GPT-2** (using byte-level Byte-Pair-Encoding) (in the [`tokenization_gpt2.py`](./pytorch_pretrained_bert/tokenization_gpt2.py) file): +- Tokenizer for **OpenAI GPT-2** (using byte-level Byte-Pair-Encoding) (in the [`tokenization_gpt2.py`](./pytorch_transformers/tokenization_gpt2.py) file): - `GPT2Tokenizer` - perform byte-level Byte-Pair-Encoding (BPE) tokenization. -- Optimizer for **BERT** (in the [`optimization.py`](./pytorch_pretrained_bert/optimization.py) file): +- Optimizer for **BERT** (in the [`optimization.py`](./pytorch_transformers/optimization.py) file): - `BertAdam` - Bert version of Adam algorithm with weight decay fix, warmup and linear decay of the learning rate. -- Optimizer for **OpenAI GPT** (in the [`optimization_openai.py`](./pytorch_pretrained_bert/optimization_openai.py) file): +- Optimizer for **OpenAI GPT** (in the [`optimization_openai.py`](./pytorch_transformers/optimization_openai.py) file): - `OpenAIAdam` - OpenAI GPT version of Adam algorithm with weight decay fix, warmup and linear decay of the learning rate. -- Configuration classes for BERT, OpenAI GPT and Transformer-XL (in the respective [`modeling.py`](./pytorch_pretrained_bert/modeling.py), [`modeling_openai.py`](./pytorch_pretrained_bert/modeling_openai.py), [`modeling_transfo_xl.py`](./pytorch_pretrained_bert/modeling_transfo_xl.py) files): +- Configuration classes for BERT, OpenAI GPT and Transformer-XL (in the respective [`modeling.py`](./pytorch_transformers/modeling.py), [`modeling_openai.py`](./pytorch_transformers/modeling_openai.py), [`modeling_transfo_xl.py`](./pytorch_transformers/modeling_transfo_xl.py) files): - `BertConfig` - Configuration class to store the configuration of a `BertModel` with utilities to read and write from JSON configuration files. - `OpenAIGPTConfig` - Configuration class to store the configuration of a `OpenAIGPTModel` with utilities to read and write from JSON configuration files. - `GPT2Config` - Configuration class to store the configuration of a `GPT2Model` with utilities to read and write from JSON configuration files. @@ -175,7 +175,7 @@ First let's prepare a tokenized input with `BertTokenizer` ```python import torch -from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM +from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows import logging @@ -252,7 +252,7 @@ First let's prepare a tokenized input with `OpenAIGPTTokenizer` ```python import torch -from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel +from pytorch_transformers import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows import logging @@ -339,7 +339,7 @@ First let's prepare a tokenized input with `TransfoXLTokenizer` ```python import torch -from pytorch_pretrained_bert import TransfoXLTokenizer, TransfoXLModel, TransfoXLLMHeadModel +from pytorch_transformers import TransfoXLTokenizer, TransfoXLModel, TransfoXLLMHeadModel # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows import logging @@ -414,7 +414,7 @@ First let's prepare a tokenized input with `GPT2Tokenizer` ```python import torch -from pytorch_pretrained_bert import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel +from pytorch_transformers import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows import logging @@ -552,7 +552,7 @@ where - `bert_config.json` or `openai_gpt_config.json` a configuration file for the model, and - `pytorch_model.bin` a PyTorch dump of a pre-trained instance of `BertForPreTraining`, `OpenAIGPTModel`, `TransfoXLModel`, `GPT2LMHeadModel` (saved with the usual `torch.save()`) - If `PRE_TRAINED_MODEL_NAME_OR_PATH` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links [here](pytorch_pretrained_bert/modeling.py)) and stored in a cache folder to avoid future download (the cache folder can be found at `~/.pytorch_pretrained_bert/`). + If `PRE_TRAINED_MODEL_NAME_OR_PATH` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links [here](pytorch_transformers/modeling.py)) and stored in a cache folder to avoid future download (the cache folder can be found at `~/.pytorch_transformers/`). - `cache_dir` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example `cache_dir='./pretrained_model_{}'.format(args.local_rank)` (see the section on distributed training for more information). - `from_tf`: should we load the weights from a locally saved TensorFlow checkpoint @@ -586,19 +586,19 @@ model = GPT2Model.from_pretrained('gpt2') #### Cache directory -`pytorch_pretrained_bert` save the pretrained weights in a cache directory which is located at (in this order of priority): +`pytorch_transformers` save the pretrained weights in a cache directory which is located at (in this order of priority): - `cache_dir` optional arguments to the `from_pretrained()` method (see above), - shell environment variable `PYTORCH_PRETRAINED_BERT_CACHE`, -- PyTorch cache home + `/pytorch_pretrained_bert/` +- PyTorch cache home + `/pytorch_transformers/` where PyTorch cache home is defined by (in this order): - shell environment variable `ENV_TORCH_HOME` - shell environment variable `ENV_XDG_CACHE_HOME` + `/torch/`) - default: `~/.cache/torch/` -Usually, if you don't set any specific environment variable, `pytorch_pretrained_bert` cache will be at `~/.cache/torch/pytorch_pretrained_bert/`. +Usually, if you don't set any specific environment variable, `pytorch_transformers` cache will be at `~/.cache/torch/pytorch_transformers/`. -You can alsways safely delete `pytorch_pretrained_bert` cache but the pretrained model weights and vocabulary files wil have to be re-downloaded from our S3. +You can alsways safely delete `pytorch_transformers` cache but the pretrained model weights and vocabulary files wil have to be re-downloaded from our S3. ### Serialization best-practices @@ -621,7 +621,7 @@ The *default filenames* of these files are as follow: Here is the recommended way of saving the model, configuration and vocabulary to an `output_dir` directory and reloading the model and tokenizer afterwards: ```python -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME output_dir = "./models/" @@ -719,7 +719,7 @@ The model can be instantiated with the following arguments: The inputs and output are **identical to the TensorFlow model inputs and outputs**. We detail them here. This model takes as *inputs*: -[`modeling.py`](./pytorch_pretrained_bert/modeling.py) +[`modeling.py`](./pytorch_transformers/modeling.py) - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the word token indices in the vocabulary (see the tokens preprocessing logic in the scripts [`run_bert_extract_features.py`](./examples/run_bert_extract_features.py), [`run_bert_classifier.py`](./examples/run_bert_classifier.py) and [`run_bert_squad.py`](./examples/run_bert_squad.py)), and - `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to a `sentence B` token (see BERT paper for more details). - `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices selected in [0, 1]. It's a mask to be used if some input sequence lengths are smaller than the max input sequence length of the current batch. It's the mask that we typically use for attention when a batch has varying length sentences. @@ -852,7 +852,7 @@ The model can be instantiated with the following arguments: The inputs and output are **identical to the TensorFlow model inputs and outputs**. We detail them here. This model takes as *inputs*: -[`modeling_openai.py`](./pytorch_pretrained_bert/modeling_openai.py) +[`modeling_openai.py`](./pytorch_transformers/modeling_openai.py) - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] (or more generally [d_1, ..., d_n, sequence_length] were d_1 ... d_n are arbitrary dimensions) with the word BPE token indices selected in the range [0, total_tokens_embeddings[ - `position_ids`: an optional torch.LongTensor with the same shape as input_ids with the position indices (selected in the range [0, config.n_positions - 1[. @@ -905,7 +905,7 @@ Transformer XL use a relative positioning with sinusiodal patterns and adaptive - the tokens in the vocabulary have to be sorted to decreasing frequency. This model takes as *inputs*: -[`modeling_transfo_xl.py`](./pytorch_pretrained_bert/modeling_transfo_xl.py) +[`modeling_transfo_xl.py`](./pytorch_transformers/modeling_transfo_xl.py) - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] with the token indices selected in the range [0, self.config.n_token[ - `mems`: an optional memory of hidden states from previous forward passes as a list (num layers) of hidden states at the entry of each layer. Each hidden states has shape [self.config.mem_len, bsz, self.config.d_model]. Note that the first two dimensions are transposed in `mems` with regards to `input_ids`. @@ -952,7 +952,7 @@ The model can be instantiated with the following arguments: The inputs and output are **identical to the TensorFlow model inputs and outputs**. We detail them here. This model takes as *inputs*: -[`modeling_gpt2.py`](./pytorch_pretrained_bert/modeling_gpt2.py) +[`modeling_gpt2.py`](./pytorch_transformers/modeling_gpt2.py) - `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length] (or more generally [d_1, ..., d_n, sequence_length] were d_1 ... d_n are arbitrary dimensions) with the word BPE token indices selected in the range [0, vocab_size[ - `position_ids`: an optional torch.LongTensor with the same shape as input_ids with the position indices (selected in the range [0, config.n_positions - 1[. @@ -1020,7 +1020,7 @@ and three methods: - `convert_ids_to_tokens(tokens)`: convert a list of `int` indices in a list of `str` tokens in the vocabulary. - `save_vocabulary(directory_path)`: save the vocabulary file to `directory_path`. Return the path to the saved vocabulary file: `vocab_file_path`. The vocabulary can be reloaded with `BertTokenizer.from_pretrained('vocab_file_path')` or `BertTokenizer.from_pretrained('directory_path')`. -Please refer to the doc strings and code in [`tokenization.py`](./pytorch_pretrained_bert/tokenization.py) for the details of the `BasicTokenizer` and `WordpieceTokenizer` classes. In general it is recommended to use `BertTokenizer` unless you know what you are doing. +Please refer to the doc strings and code in [`tokenization.py`](./pytorch_transformers/tokenization.py) for the details of the `BasicTokenizer` and `WordpieceTokenizer` classes. In general it is recommended to use `BertTokenizer` unless you know what you are doing. #### `OpenAIGPTTokenizer` @@ -1043,7 +1043,7 @@ and five methods: - `decode(ids, skip_special_tokens=False, clean_up_tokenization_spaces=False)`: decode a list of `int` indices in a string and do some post-processing if needed: (i) remove special tokens from the output and (ii) clean up tokenization spaces. - `save_vocabulary(directory_path)`: save the vocabulary, merge and special tokens files to `directory_path`. Return the path to the three files: `vocab_file_path`, `merge_file_path`, `special_tokens_file_path`. The vocabulary can be reloaded with `OpenAIGPTTokenizer.from_pretrained('directory_path')`. -Please refer to the doc strings and code in [`tokenization_openai.py`](./pytorch_pretrained_bert/tokenization_openai.py) for the details of the `OpenAIGPTTokenizer`. +Please refer to the doc strings and code in [`tokenization_openai.py`](./pytorch_transformers/tokenization_openai.py) for the details of the `OpenAIGPTTokenizer`. #### `TransfoXLTokenizer` @@ -1051,7 +1051,7 @@ Please refer to the doc strings and code in [`tokenization_openai.py`](./pytorch The API is similar to the API of `BertTokenizer` (see above). -Please refer to the doc strings and code in [`tokenization_transfo_xl.py`](./pytorch_pretrained_bert/tokenization_transfo_xl.py) for the details of these additional methods in `TransfoXLTokenizer`. +Please refer to the doc strings and code in [`tokenization_transfo_xl.py`](./pytorch_transformers/tokenization_transfo_xl.py) for the details of these additional methods in `TransfoXLTokenizer`. #### `GPT2Tokenizer` @@ -1073,7 +1073,7 @@ and two methods: - `decode(tokens)`: convert back a list of `int` tokens in a `str`. - `save_vocabulary(directory_path)`: save the vocabulary, merge and special tokens files to `directory_path`. Return the path to the three files: `vocab_file_path`, `merge_file_path`, `special_tokens_file_path`. The vocabulary can be reloaded with `OpenAIGPTTokenizer.from_pretrained('directory_path')`. -Please refer to [`tokenization_gpt2.py`](./pytorch_pretrained_bert/tokenization_gpt2.py) for more details on the `GPT2Tokenizer`. +Please refer to [`tokenization_gpt2.py`](./pytorch_transformers/tokenization_gpt2.py) for more details on the `GPT2Tokenizer`. ### Optimizers @@ -1155,7 +1155,7 @@ Here is how to use these techniques in our scripts: - **Distributed training**: Distributed training can be activated by supplying an integer greater or equal to 0 to the `--local_rank` argument (see below). - **16-bits training**: 16-bits training, also called mixed-precision training, can reduce the memory requirement of your model on the GPU by using half-precision training, basically allowing to double the batch size. If you have a recent GPU (starting from NVIDIA Volta architecture) you should see no decrease in speed. A good introduction to Mixed precision training can be found [here](https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/) and a full documentation is [here](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html). In our scripts, this option can be activated by setting the `--fp16` flag and you can play with loss scaling using the `--loss_scale` flag (see the previously linked documentation for details on loss scaling). The loss scale can be zero in which case the scale is dynamically adjusted or a positive power of two in which case the scaling is static. -To use 16-bits training and distributed training, you need to install NVIDIA's apex extension [as detailed here](https://github.com/nvidia/apex). You will find more information regarding the internals of `apex` and how to use `apex` in [the doc and the associated repository](https://github.com/nvidia/apex). The results of the tests performed on pytorch-BERT by the NVIDIA team (and my trials at reproducing them) can be consulted in [the relevant PR of the present repository](https://github.com/huggingface/pytorch-pretrained-BERT/pull/116). +To use 16-bits training and distributed training, you need to install NVIDIA's apex extension [as detailed here](https://github.com/nvidia/apex). You will find more information regarding the internals of `apex` and how to use `apex` in [the doc and the associated repository](https://github.com/nvidia/apex). The results of the tests performed on pytorch-BERT by the NVIDIA team (and my trials at reproducing them) can be consulted in [the relevant PR of the present repository](https://github.com/huggingface/pytorch-transformers/pull/116). Note: To use *Distributed Training*, you will need to run one training script on each of your machines. This can be done for example by running the following command on each server (see [the above mentioned blog post]((https://medium.com/huggingface/training-larger-batches-practical-tips-on-1-gpu-multi-gpu-distributed-setups-ec88c3e51255)) for more details): ```bash @@ -1660,7 +1660,7 @@ To help you understand and use these features, we have added a specific example ## Notebooks -We include [three Jupyter Notebooks](https://github.com/huggingface/pytorch-pretrained-BERT/tree/master/notebooks) that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model. +We include [three Jupyter Notebooks](https://github.com/huggingface/pytorch-transformers/tree/master/notebooks) that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model. - The first NoteBook ([Comparing-TF-and-PT-models.ipynb](./notebooks/Comparing-TF-and-PT-models.ipynb)) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models. @@ -1676,7 +1676,7 @@ A command-line interface is provided to convert a TensorFlow checkpoint in a PyT ### BERT -You can convert any TensorFlow checkpoint for BERT (in particular [the pre-trained models released by Google](https://github.com/google-research/bert#pre-trained-models)) in a PyTorch save file by using the [`convert_tf_checkpoint_to_pytorch.py`](./pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py ) script. +You can convert any TensorFlow checkpoint for BERT (in particular [the pre-trained models released by Google](https://github.com/google-research/bert#pre-trained-models)) in a PyTorch save file by using the [`convert_tf_checkpoint_to_pytorch.py`](./pytorch_transformers/convert_tf_checkpoint_to_pytorch.py ) script. This CLI takes as input a TensorFlow checkpoint (three files starting with `bert_model.ckpt`) and the associated configuration file (`bert_config.json`), and creates a PyTorch model for this configuration, loads the weights from the TensorFlow checkpoint in the PyTorch model and saves the resulting model in a standard PyTorch save file that can be imported using `torch.load()` (see examples in [`run_bert_extract_features.py`](./examples/run_bert_extract_features.py), [`run_bert_classifier.py`](./examples/run_bert_classifier.py) and [`run_bert_squad.py`](./examples/run_bert_squad.py)). @@ -1689,7 +1689,7 @@ Here is an example of the conversion process for a pre-trained `BERT-Base Uncase ```shell export BERT_BASE_DIR=/path/to/bert/uncased_L-12_H-768_A-12 -pytorch_pretrained_bert bert \ +pytorch_transformers bert \ $BERT_BASE_DIR/bert_model.ckpt \ $BERT_BASE_DIR/bert_config.json \ $BERT_BASE_DIR/pytorch_model.bin @@ -1704,7 +1704,7 @@ Here is an example of the conversion process for a pre-trained OpenAI GPT model, ```shell export OPENAI_GPT_CHECKPOINT_FOLDER_PATH=/path/to/openai/pretrained/numpy/weights -pytorch_pretrained_bert gpt \ +pytorch_transformers gpt \ $OPENAI_GPT_CHECKPOINT_FOLDER_PATH \ $PYTORCH_DUMP_OUTPUT \ [OPENAI_GPT_CONFIG] @@ -1717,7 +1717,7 @@ Here is an example of the conversion process for a pre-trained Transformer-XL mo ```shell export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint -pytorch_pretrained_bert transfo_xl \ +pytorch_transformers transfo_xl \ $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \ $PYTORCH_DUMP_OUTPUT \ [TRANSFO_XL_CONFIG] @@ -1730,7 +1730,7 @@ Here is an example of the conversion process for a pre-trained OpenAI's GPT-2 mo ```shell export GPT2_DIR=/path/to/gpt2/checkpoint -pytorch_pretrained_bert gpt2 \ +pytorch_transformers gpt2 \ $GPT2_DIR/model.ckpt \ $PYTORCH_DUMP_OUTPUT \ [GPT2_CONFIG] @@ -1744,7 +1744,7 @@ Here is an example of the conversion process for a pre-trained XLNet model, fine export TRANSFO_XL_CHECKPOINT_PATH=/path/to/xlnet/checkpoint export TRANSFO_XL_CONFIG_PATH=/path/to/xlnet/config -pytorch_pretrained_bert xlnet \ +pytorch_transformers xlnet \ $TRANSFO_XL_CHECKPOINT_PATH \ $TRANSFO_XL_CONFIG_PATH \ $PYTORCH_DUMP_OUTPUT \ diff --git a/docker/Dockerfile b/docker/Dockerfile index e47eb548f9..1a6c6f06f9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,6 +2,6 @@ FROM pytorch/pytorch:latest RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext -RUN pip install pytorch-pretrained-bert +RUN pip install pytorch_transformers WORKDIR /workspace \ No newline at end of file diff --git a/examples/bertology.py b/examples/bertology.py index 6f7f7c9592..096b1b44fc 100644 --- a/examples/bertology.py +++ b/examples/bertology.py @@ -12,7 +12,7 @@ from torch.utils.data import DataLoader, SequentialSampler, TensorDataset, Subse from torch.utils.data.distributed import DistributedSampler from torch.nn import CrossEntropyLoss, MSELoss -from pytorch_pretrained_bert import BertForSequenceClassification, BertTokenizer +from pytorch_transformers import BertForSequenceClassification, BertTokenizer from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics diff --git a/examples/generation_xlnet.py b/examples/generation_xlnet.py index e54f6a365f..fe3610cfd1 100644 --- a/examples/generation_xlnet.py +++ b/examples/generation_xlnet.py @@ -1,6 +1,6 @@ import torch from torch.nn import functional as F -from pytorch_pretrained_bert import XLNetModel, XLNetLMHeadModel, XLNetTokenizer +from pytorch_transformers import XLNetModel, XLNetLMHeadModel, XLNetTokenizer import logging logging.basicConfig(level=logging.INFO) diff --git a/examples/lm_finetuning/finetune_on_pregenerated.py b/examples/lm_finetuning/finetune_on_pregenerated.py index 8eda2aa5c5..505cd466f6 100644 --- a/examples/lm_finetuning/finetune_on_pregenerated.py +++ b/examples/lm_finetuning/finetune_on_pregenerated.py @@ -13,10 +13,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_bert import BertForPreTraining -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_bert import BertForPreTraining +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next") diff --git a/examples/lm_finetuning/pregenerate_training_data.py b/examples/lm_finetuning/pregenerate_training_data.py index c2211c88e6..b79257fd4b 100644 --- a/examples/lm_finetuning/pregenerate_training_data.py +++ b/examples/lm_finetuning/pregenerate_training_data.py @@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory import shelve from random import random, randrange, randint, shuffle, choice -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer +from pytorch_transformers.tokenization_bert import BertTokenizer import numpy as np import json import collections diff --git a/examples/lm_finetuning/simple_lm_finetuning.py b/examples/lm_finetuning/simple_lm_finetuning.py index bcfd138442..3008787cd1 100644 --- a/examples/lm_finetuning/simple_lm_finetuning.py +++ b/examples/lm_finetuning/simple_lm_finetuning.py @@ -29,10 +29,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_bert import BertForPreTraining -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_bert import BertForPreTraining +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', diff --git a/examples/run_bert_classifier.py b/examples/run_bert_classifier.py index 233a7ee5d1..506aecc5b1 100644 --- a/examples/run_bert_classifier.py +++ b/examples/run_bert_classifier.py @@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss from tensorboardX import SummaryWriter -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_bert import BertForSequenceClassification -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_bert import BertForSequenceClassification +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics diff --git a/examples/run_bert_extract_features.py b/examples/run_bert_extract_features.py index 2a550c431a..cc7dedd6af 100644 --- a/examples/run_bert_extract_features.py +++ b/examples/run_bert_extract_features.py @@ -28,8 +28,8 @@ import torch from torch.utils.data import TensorDataset, DataLoader, SequentialSampler from torch.utils.data.distributed import DistributedSampler -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer -from pytorch_pretrained_bert.modeling_bert import BertModel +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.modeling_bert import BertModel logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt = '%m/%d/%Y %H:%M:%S', diff --git a/examples/run_bert_squad.py b/examples/run_bert_squad.py index f8eee9c8eb..c3fdb06316 100644 --- a/examples/run_bert_squad.py +++ b/examples/run_bert_squad.py @@ -33,10 +33,10 @@ from tqdm import tqdm, trange from tensorboardX import SummaryWriter -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_bert import BertForQuestionAnswering -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_bert import BertForQuestionAnswering +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers.tokenization_bert import BertTokenizer from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions diff --git a/examples/run_bert_swag.py b/examples/run_bert_swag.py index 3e45225891..00cd3a7840 100644 --- a/examples/run_bert_swag.py +++ b/examples/run_bert_swag.py @@ -32,10 +32,10 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, from torch.utils.data.distributed import DistributedSampler from tqdm import tqdm, trange -from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_bert import BertForMultipleChoice, BertConfig -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer +from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_bert import BertForMultipleChoice, BertConfig +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers.tokenization_bert import BertTokenizer logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt = '%m/%d/%Y %H:%M:%S', diff --git a/examples/run_gpt2.py b/examples/run_gpt2.py index 8f8208bbcd..a759e449f9 100644 --- a/examples/run_gpt2.py +++ b/examples/run_gpt2.py @@ -8,7 +8,7 @@ import torch import torch.nn.functional as F import numpy as np -from pytorch_pretrained_bert import GPT2LMHeadModel, GPT2Tokenizer +from pytorch_transformers import GPT2LMHeadModel, GPT2Tokenizer logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt = '%m/%d/%Y %H:%M:%S', diff --git a/examples/run_openai_gpt.py b/examples/run_openai_gpt.py index ac5c474491..02b86b3a22 100644 --- a/examples/run_openai_gpt.py +++ b/examples/run_openai_gpt.py @@ -39,7 +39,7 @@ import torch from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, TensorDataset) -from pytorch_pretrained_bert import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, +from pytorch_transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, OpenAIAdam, cached_path, WEIGHTS_NAME, CONFIG_NAME) ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz" diff --git a/examples/run_transfo_xl.py b/examples/run_transfo_xl.py index 0ea7b32053..fda0d8dc28 100644 --- a/examples/run_transfo_xl.py +++ b/examples/run_transfo_xl.py @@ -28,7 +28,7 @@ import math import torch -from pytorch_pretrained_bert import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer +from pytorch_transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt = '%m/%d/%Y %H:%M:%S', diff --git a/examples/run_xlnet_classifier.py b/examples/run_xlnet_classifier.py index e30cad773b..7cf8a8d877 100644 --- a/examples/run_xlnet_classifier.py +++ b/examples/run_xlnet_classifier.py @@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss from tensorboardX import SummaryWriter -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_xlnet import XLNetForSequenceClassification -from pytorch_pretrained_bert.tokenization_xlnet import XLNetTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_xlnet import XLNetForSequenceClassification +from pytorch_transformers.tokenization_xlnet import XLNetTokenizer +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics diff --git a/examples/run_xlnet_squad.py b/examples/run_xlnet_squad.py index c299358b79..393fa98abd 100644 --- a/examples/run_xlnet_squad.py +++ b/examples/run_xlnet_squad.py @@ -33,10 +33,10 @@ from tqdm import tqdm, trange from tensorboardX import SummaryWriter -from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME -from pytorch_pretrained_bert.modeling_xlnet import BertForQuestionAnswering -from pytorch_pretrained_bert.tokenization_xlnet import XLNetTokenizer -from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule +from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME +from pytorch_transformers.modeling_xlnet import BertForQuestionAnswering +from pytorch_transformers.tokenization_xlnet import XLNetTokenizer +from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions diff --git a/examples/tests/examples_tests.py b/examples/tests/examples_tests.py new file mode 100644 index 0000000000..120df35f82 --- /dev/null +++ b/examples/tests/examples_tests.py @@ -0,0 +1,50 @@ +# coding=utf-8 +# Copyright 2018 HuggingFace Inc.. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import unittest +import json +import random +import shutil +import pytest + +import torch + +from pytorch_transformers import PretrainedConfig, PreTrainedModel +from pytorch_transformers.modeling_bert import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP + + +class ModelUtilsTest(unittest.TestCase): + def test_model_from_pretrained(self): + for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + config = BertConfig.from_pretrained(model_name) + self.assertIsNotNone(config) + self.assertIsInstance(config, PretrainedConfig) + + model = BertModel.from_pretrained(model_name) + self.assertIsNotNone(model) + self.assertIsInstance(model, PreTrainedModel) + + config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) + model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) + self.assertEqual(model.config.output_attentions, True) + self.assertEqual(model.config.output_hidden_states, True) + self.assertEqual(model.config, config) + +if __name__ == "__main__": + unittest.main() diff --git a/examples/utils_squad.py b/examples/utils_squad.py index 0dfecd202c..c858776183 100644 --- a/examples/utils_squad.py +++ b/examples/utils_squad.py @@ -24,7 +24,7 @@ import math import collections from io import open -from pytorch_pretrained_bert.tokenization_bert import BasicTokenizer, whitespace_tokenize +from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize logger = logging.getLogger(__name__) diff --git a/hubconfs/bert_hubconf.py b/hubconfs/bert_hubconf.py index 94c7a18a30..0ee0df6697 100644 --- a/hubconfs/bert_hubconf.py +++ b/hubconfs/bert_hubconf.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_bert import BertTokenizer -from pytorch_pretrained_bert.modeling_bert import ( +from pytorch_transformers.tokenization_bert import BertTokenizer +from pytorch_transformers.modeling_bert import ( BertModel, BertForNextSentencePrediction, BertForMaskedLM, @@ -86,7 +86,7 @@ def bertTokenizer(*args, **kwargs): Example: >>> import torch >>> sentence = 'Hello, World!' - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) >>> toks = tokenizer.tokenize(sentence) ['Hello', '##,', 'World', '##!'] >>> ids = tokenizer.convert_tokens_to_ids(toks) @@ -106,7 +106,7 @@ def bertModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -115,7 +115,7 @@ def bertModel(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertModel', 'bert-base-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): @@ -135,7 +135,7 @@ def bertForNextSentencePrediction(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -144,7 +144,7 @@ def bertForNextSentencePrediction(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForNextSentencePrediction - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForNextSentencePrediction', 'bert-base-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') >>> model.eval() # Predict the next sentence classification logits >>> with torch.no_grad(): @@ -165,7 +165,7 @@ def bertForPreTraining(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -173,7 +173,7 @@ def bertForPreTraining(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForPreTraining - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForPreTraining', 'bert-base-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') >>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) """ model = BertForPreTraining.from_pretrained(*args, **kwargs) @@ -189,7 +189,7 @@ def bertForMaskedLM(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -200,7 +200,7 @@ def bertForMaskedLM(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForMaskedLM - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForMaskedLM', 'bert-base-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') >>> model.eval() # Predict all tokens >>> with torch.no_grad(): @@ -231,7 +231,7 @@ def bertForSequenceClassification(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -240,7 +240,7 @@ def bertForSequenceClassification(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForSequenceClassification - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) >>> model.eval() # Predict the sequence classification logits >>> with torch.no_grad(): @@ -266,7 +266,7 @@ def bertForMultipleChoice(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -275,7 +275,7 @@ def bertForMultipleChoice(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) >>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) # Load bertForMultipleChoice - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) >>> model.eval() # Predict the multiple choice logits >>> with torch.no_grad(): @@ -299,7 +299,7 @@ def bertForQuestionAnswering(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -308,7 +308,7 @@ def bertForQuestionAnswering(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForQuestionAnswering - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForQuestionAnswering', 'bert-base-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') >>> model.eval() # Predict the start and end positions logits >>> with torch.no_grad(): @@ -338,7 +338,7 @@ def bertForTokenClassification(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) @@ -347,7 +347,7 @@ def bertForTokenClassification(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertForTokenClassification - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) >>> model.eval() # Predict the token classification logits >>> with torch.no_grad(): diff --git a/hubconfs/gpt2_hubconf.py b/hubconfs/gpt2_hubconf.py index 3ac8bc72ab..dbaa2cd612 100644 --- a/hubconfs/gpt2_hubconf.py +++ b/hubconfs/gpt2_hubconf.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer -from pytorch_pretrained_bert.modeling_gpt2 import ( +from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer +from pytorch_transformers.modeling_gpt2 import ( GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel @@ -53,7 +53,7 @@ def gpt2Tokenizer(*args, **kwargs): Example: >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') >>> text = "Who was Jim Henson ?" >>> indexed_tokens = tokenizer.encode(tokenized_text) @@ -72,7 +72,7 @@ def gpt2Model(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -83,7 +83,7 @@ def gpt2Model(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load gpt2Model - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Model', 'gpt2') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') >>> model.eval() # Predict hidden states features for each layer @@ -105,7 +105,7 @@ def gpt2LMHeadModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -116,7 +116,7 @@ def gpt2LMHeadModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load gpt2LMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2LMHeadModel', 'gpt2') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') >>> model.eval() # Predict hidden states features for each layer @@ -144,7 +144,7 @@ def gpt2DoubleHeadsModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2Tokenizer', 'gpt2') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') # Prepare tokenized input >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -157,7 +157,7 @@ def gpt2DoubleHeadsModel(*args, **kwargs): >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # Load gpt2DoubleHeadsModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'gpt2DoubleHeadsModel', 'gpt2') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') >>> model.eval() # Predict hidden states features for each layer diff --git a/hubconfs/gpt_hubconf.py b/hubconfs/gpt_hubconf.py index f3d03888ae..1683c881fa 100644 --- a/hubconfs/gpt_hubconf.py +++ b/hubconfs/gpt_hubconf.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer -from pytorch_pretrained_bert.modeling_openai import ( +from pytorch_transformers.tokenization_openai import OpenAIGPTTokenizer +from pytorch_transformers.modeling_openai import ( OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel @@ -77,7 +77,7 @@ def openAIGPTTokenizer(*args, **kwargs): Example: >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" >>> tokenized_text = tokenizer.tokenize(text) @@ -98,7 +98,7 @@ def openAIGPTModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -107,7 +107,7 @@ def openAIGPTModel(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) # Load openAIGPTModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTModel', 'openai-gpt') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') >>> model.eval() # Predict hidden states features for each layer @@ -127,7 +127,7 @@ def openAIGPTLMHeadModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -136,7 +136,7 @@ def openAIGPTLMHeadModel(*args, **kwargs): >>> tokens_tensor = torch.tensor([indexed_tokens]) # Load openAIGPTLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTLMHeadModel', 'openai-gpt') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') >>> model.eval() # Predict hidden states features for each layer @@ -162,7 +162,7 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -175,7 +175,7 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # Load openAIGPTDoubleHeadsModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTDoubleHeadsModel', 'openai-gpt') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') >>> model.eval() # Predict hidden states features for each layer diff --git a/hubconfs/transformer_xl_hubconf.py b/hubconfs/transformer_xl_hubconf.py index d5c697547e..d89db894ad 100644 --- a/hubconfs/transformer_xl_hubconf.py +++ b/hubconfs/transformer_xl_hubconf.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer -from pytorch_pretrained_bert.modeling_transfo_xl import ( +from pytorch_transformers.tokenization_transfo_xl import TransfoXLTokenizer +from pytorch_transformers.modeling_transfo_xl import ( TransfoXLModel, TransfoXLLMHeadModel ) @@ -46,7 +46,7 @@ def transformerXLTokenizer(*args, **kwargs): Example: >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') >>> text = "Who was Jim Henson ?" >>> tokenized_text = tokenizer.tokenize(tokenized_text) @@ -64,7 +64,7 @@ def transformerXLModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -77,7 +77,7 @@ def transformerXLModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load transformerXLModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLModel', 'transfo-xl-wt103') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') >>> model.eval() # Predict hidden states features for each layer @@ -99,7 +99,7 @@ def transformerXLLMHeadModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLTokenizer', 'transfo-xl-wt103') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -112,7 +112,7 @@ def transformerXLLMHeadModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load transformerXLLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'transformerXLLMHeadModel', 'transfo-xl-wt103') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') >>> model.eval() # Predict hidden states features for each layer diff --git a/hubconfs/xlm_hubconf.py b/hubconfs/xlm_hubconf.py index 154f875bfb..4f6fd93c24 100644 --- a/hubconfs/xlm_hubconf.py +++ b/hubconfs/xlm_hubconf.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_xlm import XLMTokenizer -from pytorch_pretrained_bert.modeling_xlm import ( +from pytorch_transformers.tokenization_xlm import XLMTokenizer +from pytorch_transformers.modeling_xlm import ( XLMConfig, XLMModel, XLMWithLMHeadModel, @@ -18,7 +18,7 @@ xlm_start_docstring = """ Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlmTokenizer', 'xlm-mlm-en-2048') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -77,7 +77,7 @@ def xlmTokenizer(*args, **kwargs): Example: >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlmTokenizer', 'xlm-mlm-en-2048') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') >>> text = "Who was Jim Henson ?" >>> indexed_tokens = tokenizer.encode(tokenized_text) @@ -91,7 +91,7 @@ def xlmTokenizer(*args, **kwargs): def xlmModel(*args, **kwargs): """ # Load xlmModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlmModel', 'xlm-mlm-en-2048') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') >>> model.eval() # Predict hidden states features for each layer @@ -116,7 +116,7 @@ def xlmLMHeadModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') >>> model.eval() # Predict hidden states features for each layer @@ -143,7 +143,7 @@ def xlmLMHeadModel(*args, **kwargs): # Example: # # Load the tokenizer # >>> import torch -# >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetTokenizer', 'xlm-mlm-en-2048') +# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') # # Prepare tokenized input # >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -156,7 +156,7 @@ def xlmLMHeadModel(*args, **kwargs): # >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # # Load xlnetForSequenceClassification -# >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') +# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') # >>> model.eval() # # Predict sequence classes logits diff --git a/hubconfs/xlnet_hubconf.1.py b/hubconfs/xlnet_hubconf.1.py index d3766d04e0..4c5105a241 100644 --- a/hubconfs/xlnet_hubconf.1.py +++ b/hubconfs/xlnet_hubconf.1.py @@ -1,5 +1,5 @@ -from pytorch_pretrained_bert.tokenization_xlnet import XLNetTokenizer -from pytorch_pretrained_bert.modeling_xlnet import ( +from pytorch_transformers.tokenization_xlnet import XLNetTokenizer +from pytorch_transformers.modeling_xlnet import ( XLNetConfig, XLNetModel, XLNetLMHeadModel, @@ -54,7 +54,7 @@ def xlnetTokenizer(*args, **kwargs): Example: >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetTokenizer', 'xlnet-large-cased') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') >>> text = "Who was Jim Henson ?" >>> indexed_tokens = tokenizer.encode(tokenized_text) @@ -73,7 +73,7 @@ def xlnetModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetTokenizer', 'xlnet-large-cased') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -84,7 +84,7 @@ def xlnetModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetModel', 'xlnet-large-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') >>> model.eval() # Predict hidden states features for each layer @@ -107,7 +107,7 @@ def xlnetLMHeadModel(*args, **kwargs): Example: # Load the tokenizer >>> import torch - >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetTokenizer', 'xlnet-large-cased') + >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # Prepare tokenized input >>> text_1 = "Who was Jim Henson ?" @@ -118,7 +118,7 @@ def xlnetLMHeadModel(*args, **kwargs): >>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load xlnetLMHeadModel - >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetLMHeadModel', 'xlnet-large-cased') + >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') >>> model.eval() # Predict hidden states features for each layer @@ -145,7 +145,7 @@ def xlnetLMHeadModel(*args, **kwargs): # Example: # # Load the tokenizer # >>> import torch -# >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetTokenizer', 'xlnet-large-cased') +# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # # Prepare tokenized input # >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" @@ -158,7 +158,7 @@ def xlnetLMHeadModel(*args, **kwargs): # >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # # Load xlnetForSequenceClassification -# >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'xlnetForSequenceClassification', 'xlnet-large-cased') +# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') # >>> model.eval() # # Predict sequence classes logits diff --git a/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb b/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb index ea7271df96..809f6ea6e0 100644 --- a/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb +++ b/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb @@ -78,7 +78,7 @@ "import importlib.util\n", "import sys\n", "import tensorflow as tf\n", - "import pytorch_pretrained_bert as ppb\n", + "import pytorch_transformers as ppb\n", "\n", "def del_all_flags(FLAGS):\n", " flags_dict = FLAGS._flags() \n", @@ -3997,9 +3997,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n", - "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert - extracting archive file /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n", - "11/16/2018 11:03:08 - INFO - pytorch_pretrained_bert.modeling_bert - Model config {\n", + "11/16/2018 11:03:05 - INFO - pytorch_transformers.modeling_bert - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_transformers/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n", + "11/16/2018 11:03:05 - INFO - pytorch_transformers.modeling_bert - extracting archive file /Users/thomaswolf/.pytorch_transformers/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n", + "11/16/2018 11:03:08 - INFO - pytorch_transformers.modeling_bert - Model config {\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", diff --git a/notebooks/Comparing-TF-and-PT-models.ipynb b/notebooks/Comparing-TF-and-PT-models.ipynb index 3e438e2f55..b7382e4652 100644 --- a/notebooks/Comparing-TF-and-PT-models.ipynb +++ b/notebooks/Comparing-TF-and-PT-models.ipynb @@ -342,7 +342,7 @@ "outputs": [], "source": [ "import extract_features\n", - "import pytorch_pretrained_bert as ppb\n", + "import pytorch_transformers as ppb\n", "from extract_features import *" ] }, @@ -375,8 +375,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert - loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n", - "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert - Model config {\n", + "11/15/2018 16:21:18 - INFO - pytorch_transformers.modeling_bert - loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n", + "11/15/2018 16:21:18 - INFO - pytorch_transformers.modeling_bert - Model config {\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", diff --git a/pytorch_pretrained_bert/__init__.py b/pytorch_transformers/__init__.py similarity index 98% rename from pytorch_pretrained_bert/__init__.py rename to pytorch_transformers/__init__.py index 23346967ba..cbd007f872 100644 --- a/pytorch_pretrained_bert/__init__.py +++ b/pytorch_transformers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.6.2" +__version__ = "0.7.0" from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .tokenization_openai import OpenAIGPTTokenizer from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus) diff --git a/pytorch_pretrained_bert/__main__.py b/pytorch_transformers/__main__.py similarity index 72% rename from pytorch_pretrained_bert/__main__.py rename to pytorch_transformers/__main__.py index bb9534a830..95504c1493 100644 --- a/pytorch_pretrained_bert/__main__.py +++ b/pytorch_transformers/__main__.py @@ -4,24 +4,24 @@ def main(): if (len(sys.argv) < 4 or len(sys.argv) > 6) or sys.argv[1] not in ["bert", "gpt", "transfo_xl", "gpt2", "xlnet"]: print( "Should be used as one of: \n" - ">> `pytorch_pretrained_bert bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`, \n" - ">> `pytorch_pretrained_bert gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`, \n" - ">> `pytorch_pretrained_bert transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG]` or \n" - ">> `pytorch_pretrained_bert gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG]` or \n" - ">> `pytorch_pretrained_bert xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") + ">> `pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`, \n" + ">> `pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`, \n" + ">> `pytorch_transformers transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG]` or \n" + ">> `pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG]` or \n" + ">> `pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") else: if sys.argv[1] == "bert": try: from .convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch except ImportError: - print("pytorch_pretrained_bert can only be used from the commandline to convert TensorFlow models in PyTorch, " + print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " "In that case, it requires TensorFlow to be installed. Please see " "https://www.tensorflow.org/install/ for installation instructions.") raise if len(sys.argv) != 5: # pylint: disable=line-too-long - print("Should be used as `pytorch_pretrained_bert bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`") + print("Should be used as `pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`") else: PYTORCH_DUMP_OUTPUT = sys.argv.pop() TF_CONFIG = sys.argv.pop() @@ -31,7 +31,7 @@ def main(): from .convert_openai_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch if len(sys.argv) < 4 or len(sys.argv) > 5: # pylint: disable=line-too-long - print("Should be used as `pytorch_pretrained_bert gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`") + print("Should be used as `pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`") else: OPENAI_GPT_CHECKPOINT_FOLDER_PATH = sys.argv[2] PYTORCH_DUMP_OUTPUT = sys.argv[3] @@ -46,13 +46,13 @@ def main(): try: from .convert_transfo_xl_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch except ImportError: - print("pytorch_pretrained_bert can only be used from the commandline to convert TensorFlow models in PyTorch, " + print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " "In that case, it requires TensorFlow to be installed. Please see " "https://www.tensorflow.org/install/ for installation instructions.") raise if len(sys.argv) < 4 or len(sys.argv) > 5: # pylint: disable=line-too-long - print("Should be used as `pytorch_pretrained_bert transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") + print("Should be used as `pytorch_transformers transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") else: if 'ckpt' in sys.argv[2].lower(): TF_CHECKPOINT = sys.argv[2] @@ -70,14 +70,14 @@ def main(): try: from .convert_gpt2_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch except ImportError: - print("pytorch_pretrained_bert can only be used from the commandline to convert TensorFlow models in PyTorch, " + print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " "In that case, it requires TensorFlow to be installed. Please see " "https://www.tensorflow.org/install/ for installation instructions.") raise if len(sys.argv) < 4 or len(sys.argv) > 5: # pylint: disable=line-too-long - print("Should be used as `pytorch_pretrained_bert gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") + print("Should be used as `pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") else: TF_CHECKPOINT = sys.argv[2] PYTORCH_DUMP_OUTPUT = sys.argv[3] @@ -90,14 +90,14 @@ def main(): try: from .convert_xlnet_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch except ImportError: - print("pytorch_pretrained_bert can only be used from the commandline to convert TensorFlow models in PyTorch, " + print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " "In that case, it requires TensorFlow to be installed. Please see " "https://www.tensorflow.org/install/ for installation instructions.") raise if len(sys.argv) < 5 or len(sys.argv) > 6: # pylint: disable=line-too-long - print("Should be used as `pytorch_pretrained_bert xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") + print("Should be used as `pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") else: TF_CHECKPOINT = sys.argv[2] TF_CONFIG = sys.argv[3] diff --git a/pytorch_pretrained_bert/convert_gpt2_checkpoint_to_pytorch.py b/pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py similarity index 97% rename from pytorch_pretrained_bert/convert_gpt2_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py index 51d52a6694..86c8264cb5 100755 --- a/pytorch_pretrained_bert/convert_gpt2_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py @@ -21,7 +21,7 @@ from io import open import torch -from pytorch_pretrained_bert.modeling_gpt2 import (CONFIG_NAME, WEIGHTS_NAME, +from pytorch_transformers.modeling_gpt2 import (CONFIG_NAME, WEIGHTS_NAME, GPT2Config, GPT2Model, load_tf_weights_in_gpt2) diff --git a/pytorch_pretrained_bert/convert_openai_checkpoint_to_pytorch.py b/pytorch_transformers/convert_openai_checkpoint_to_pytorch.py similarity index 97% rename from pytorch_pretrained_bert/convert_openai_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_openai_checkpoint_to_pytorch.py index 566008aaa0..68e9dea624 100755 --- a/pytorch_pretrained_bert/convert_openai_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_openai_checkpoint_to_pytorch.py @@ -21,7 +21,7 @@ from io import open import torch -from pytorch_pretrained_bert.modeling_openai import (CONFIG_NAME, WEIGHTS_NAME, +from pytorch_transformers.modeling_openai import (CONFIG_NAME, WEIGHTS_NAME, OpenAIGPTConfig, OpenAIGPTModel, load_tf_weights_in_openai_gpt) diff --git a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py b/pytorch_transformers/convert_tf_checkpoint_to_pytorch.py similarity index 95% rename from pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_tf_checkpoint_to_pytorch.py index 42f7380969..7530d7e12d 100755 --- a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_tf_checkpoint_to_pytorch.py @@ -25,7 +25,7 @@ import tensorflow as tf import torch import numpy as np -from pytorch_pretrained_bert.modeling_bert import BertConfig, BertForPreTraining, load_tf_weights_in_bert +from pytorch_transformers.modeling_bert import BertConfig, BertForPreTraining, load_tf_weights_in_bert def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model diff --git a/pytorch_pretrained_bert/convert_transfo_xl_checkpoint_to_pytorch.py b/pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py similarity index 96% rename from pytorch_pretrained_bert/convert_transfo_xl_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py index 8d6b9651c7..2d666a1f03 100755 --- a/pytorch_pretrained_bert/convert_transfo_xl_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py @@ -23,13 +23,13 @@ from io import open import torch -import pytorch_pretrained_bert.tokenization_transfo_xl as data_utils -from pytorch_pretrained_bert.modeling_transfo_xl import (CONFIG_NAME, +import pytorch_transformers.tokenization_transfo_xl as data_utils +from pytorch_transformers.modeling_transfo_xl import (CONFIG_NAME, WEIGHTS_NAME, TransfoXLConfig, TransfoXLLMHeadModel, load_tf_weights_in_transfo_xl) -from pytorch_pretrained_bert.tokenization_transfo_xl import (CORPUS_NAME, +from pytorch_transformers.tokenization_transfo_xl import (CORPUS_NAME, VOCAB_NAME) if sys.version_info[0] == 2: diff --git a/pytorch_pretrained_bert/convert_xlm_checkpoint_to_pytorch.py b/pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py similarity index 93% rename from pytorch_pretrained_bert/convert_xlm_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py index 44a40174b4..0cbe962cea 100755 --- a/pytorch_pretrained_bert/convert_xlm_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py @@ -23,8 +23,8 @@ from io import open import torch import numpy -from pytorch_pretrained_bert.modeling_xlm import (CONFIG_NAME, WEIGHTS_NAME, XLMConfig, XLMModel) -from pytorch_pretrained_bert.tokenization_xlm import MERGES_NAME, VOCAB_NAME +from pytorch_transformers.modeling_xlm import (CONFIG_NAME, WEIGHTS_NAME, XLMConfig, XLMModel) +from pytorch_transformers.tokenization_xlm import MERGES_NAME, VOCAB_NAME def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_path): diff --git a/pytorch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py b/pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py similarity index 98% rename from pytorch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py rename to pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py index ce4fcc7810..f41db87124 100755 --- a/pytorch_pretrained_bert/convert_xlnet_checkpoint_to_pytorch.py +++ b/pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py @@ -22,7 +22,7 @@ import os import argparse import torch -from pytorch_pretrained_bert.modeling_xlnet import (CONFIG_NAME, WEIGHTS_NAME, +from pytorch_transformers.modeling_xlnet import (CONFIG_NAME, WEIGHTS_NAME, XLNetConfig, XLNetLMHeadModel, XLNetForQuestionAnswering, XLNetForSequenceClassification, diff --git a/pytorch_pretrained_bert/file_utils.py b/pytorch_transformers/file_utils.py similarity index 99% rename from pytorch_pretrained_bert/file_utils.py rename to pytorch_transformers/file_utils.py index 994f47d57c..1397bd416b 100644 --- a/pytorch_pretrained_bert/file_utils.py +++ b/pytorch_transformers/file_utils.py @@ -29,7 +29,7 @@ except ImportError: torch_cache_home = os.path.expanduser( os.getenv('TORCH_HOME', os.path.join( os.getenv('XDG_CACHE_HOME', '~/.cache'), 'torch'))) -default_cache_path = os.path.join(torch_cache_home, 'pytorch_pretrained_bert') +default_cache_path = os.path.join(torch_cache_home, 'pytorch_transformers') try: from urllib.parse import urlparse diff --git a/pytorch_pretrained_bert/model_utils.py b/pytorch_transformers/model_utils.py similarity index 100% rename from pytorch_pretrained_bert/model_utils.py rename to pytorch_transformers/model_utils.py diff --git a/pytorch_pretrained_bert/modeling_bert.py b/pytorch_transformers/modeling_bert.py similarity index 100% rename from pytorch_pretrained_bert/modeling_bert.py rename to pytorch_transformers/modeling_bert.py diff --git a/pytorch_pretrained_bert/modeling_gpt2.py b/pytorch_transformers/modeling_gpt2.py similarity index 100% rename from pytorch_pretrained_bert/modeling_gpt2.py rename to pytorch_transformers/modeling_gpt2.py diff --git a/pytorch_pretrained_bert/modeling_openai.py b/pytorch_transformers/modeling_openai.py similarity index 100% rename from pytorch_pretrained_bert/modeling_openai.py rename to pytorch_transformers/modeling_openai.py diff --git a/pytorch_pretrained_bert/modeling_transfo_xl.py b/pytorch_transformers/modeling_transfo_xl.py similarity index 100% rename from pytorch_pretrained_bert/modeling_transfo_xl.py rename to pytorch_transformers/modeling_transfo_xl.py diff --git a/pytorch_pretrained_bert/modeling_transfo_xl_utilities.py b/pytorch_transformers/modeling_transfo_xl_utilities.py similarity index 100% rename from pytorch_pretrained_bert/modeling_transfo_xl_utilities.py rename to pytorch_transformers/modeling_transfo_xl_utilities.py diff --git a/pytorch_pretrained_bert/modeling_xlm.py b/pytorch_transformers/modeling_xlm.py similarity index 99% rename from pytorch_pretrained_bert/modeling_xlm.py rename to pytorch_transformers/modeling_xlm.py index 9d1775161d..6decba3cce 100644 --- a/pytorch_pretrained_bert/modeling_xlm.py +++ b/pytorch_transformers/modeling_xlm.py @@ -204,7 +204,7 @@ def gelu(x): GELU activation https://arxiv.org/abs/1606.08415 https://github.com/huggingface/pytorch-openai-transformer-lm/blob/master/model_pytorch.py#L14 - https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/modeling.py + https://github.com/huggingface/pytorch-transformers/blob/master/modeling.py """ # return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) return 0.5 * x * (1.0 + torch.erf(x / math.sqrt(2.0))) diff --git a/pytorch_pretrained_bert/modeling_xlnet.py b/pytorch_transformers/modeling_xlnet.py similarity index 100% rename from pytorch_pretrained_bert/modeling_xlnet.py rename to pytorch_transformers/modeling_xlnet.py diff --git a/pytorch_pretrained_bert/optimization.py b/pytorch_transformers/optimization.py similarity index 100% rename from pytorch_pretrained_bert/optimization.py rename to pytorch_transformers/optimization.py diff --git a/pytorch_pretrained_bert/optimization_openai.py b/pytorch_transformers/optimization_openai.py similarity index 100% rename from pytorch_pretrained_bert/optimization_openai.py rename to pytorch_transformers/optimization_openai.py diff --git a/pytorch_pretrained_bert/tests/__init__.py b/pytorch_transformers/tests/__init__.py similarity index 100% rename from pytorch_pretrained_bert/tests/__init__.py rename to pytorch_transformers/tests/__init__.py diff --git a/pytorch_pretrained_bert/tests/conftest.py b/pytorch_transformers/tests/conftest.py similarity index 100% rename from pytorch_pretrained_bert/tests/conftest.py rename to pytorch_transformers/tests/conftest.py diff --git a/pytorch_pretrained_bert/tests/fixtures/input.txt b/pytorch_transformers/tests/fixtures/input.txt similarity index 100% rename from pytorch_pretrained_bert/tests/fixtures/input.txt rename to pytorch_transformers/tests/fixtures/input.txt diff --git a/pytorch_pretrained_bert/tests/fixtures/sample_text.txt b/pytorch_transformers/tests/fixtures/sample_text.txt similarity index 100% rename from pytorch_pretrained_bert/tests/fixtures/sample_text.txt rename to pytorch_transformers/tests/fixtures/sample_text.txt diff --git a/pytorch_pretrained_bert/tests/fixtures/test_sentencepiece.model b/pytorch_transformers/tests/fixtures/test_sentencepiece.model similarity index 100% rename from pytorch_pretrained_bert/tests/fixtures/test_sentencepiece.model rename to pytorch_transformers/tests/fixtures/test_sentencepiece.model diff --git a/pytorch_pretrained_bert/tests/model_tests_commons.py b/pytorch_transformers/tests/model_tests_commons.py similarity index 99% rename from pytorch_pretrained_bert/tests/model_tests_commons.py rename to pytorch_transformers/tests/model_tests_commons.py index e7c97a0787..b831f85552 100644 --- a/pytorch_pretrained_bert/tests/model_tests_commons.py +++ b/pytorch_transformers/tests/model_tests_commons.py @@ -412,7 +412,7 @@ class GPTModelTester(object): [[], []]) def create_and_check_model_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(self.base_model_class.PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/model_utils_test.py b/pytorch_transformers/tests/model_utils_test.py similarity index 89% rename from pytorch_pretrained_bert/tests/model_utils_test.py rename to pytorch_transformers/tests/model_utils_test.py index 59f076fa00..120df35f82 100644 --- a/pytorch_pretrained_bert/tests/model_utils_test.py +++ b/pytorch_transformers/tests/model_utils_test.py @@ -25,8 +25,8 @@ import pytest import torch -from pytorch_pretrained_bert import PretrainedConfig, PreTrainedModel -from pytorch_pretrained_bert.modeling_bert import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP +from pytorch_transformers import PretrainedConfig, PreTrainedModel +from pytorch_transformers.modeling_bert import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP class ModelUtilsTest(unittest.TestCase): diff --git a/pytorch_pretrained_bert/tests/modeling_bert_test.py b/pytorch_transformers/tests/modeling_bert_test.py similarity index 98% rename from pytorch_pretrained_bert/tests/modeling_bert_test.py rename to pytorch_transformers/tests/modeling_bert_test.py index 7a9d49fde7..b140f5e647 100644 --- a/pytorch_pretrained_bert/tests/modeling_bert_test.py +++ b/pytorch_transformers/tests/modeling_bert_test.py @@ -20,11 +20,11 @@ import unittest import shutil import pytest -from pytorch_pretrained_bert import (BertConfig, BertModel, BertForMaskedLM, +from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM, BertForNextSentencePrediction, BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification, BertForTokenClassification, BertForMultipleChoice) -from pytorch_pretrained_bert.modeling_bert import PRETRAINED_MODEL_ARCHIVE_MAP +from pytorch_transformers.modeling_bert import PRETRAINED_MODEL_ARCHIVE_MAP from .model_tests_commons import (create_and_check_commons, ConfigTester, ids_tensor) @@ -266,7 +266,7 @@ class BertModelTest(unittest.TestCase): @pytest.mark.slow def test_model_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = BertModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/modeling_gpt2_test.py b/pytorch_transformers/tests/modeling_gpt2_test.py similarity index 96% rename from pytorch_pretrained_bert/tests/modeling_gpt2_test.py rename to pytorch_transformers/tests/modeling_gpt2_test.py index 122cdf3c7b..4ace52571a 100644 --- a/pytorch_pretrained_bert/tests/modeling_gpt2_test.py +++ b/pytorch_transformers/tests/modeling_gpt2_test.py @@ -25,7 +25,7 @@ import pytest import torch -from pytorch_pretrained_bert import (GPT2Config, GPT2Model, +from pytorch_transformers import (GPT2Config, GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) from .model_tests_commons import (create_and_check_commons, ConfigTester, GPTModelTester) diff --git a/pytorch_pretrained_bert/tests/modeling_openai_test.py b/pytorch_transformers/tests/modeling_openai_test.py similarity index 96% rename from pytorch_pretrained_bert/tests/modeling_openai_test.py rename to pytorch_transformers/tests/modeling_openai_test.py index 627bc564de..fe81157023 100644 --- a/pytorch_pretrained_bert/tests/modeling_openai_test.py +++ b/pytorch_transformers/tests/modeling_openai_test.py @@ -21,7 +21,7 @@ import pytest import torch -from pytorch_pretrained_bert import (OpenAIGPTConfig, OpenAIGPTModel, +from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) from .model_tests_commons import (create_and_check_commons, ConfigTester, GPTModelTester) diff --git a/pytorch_pretrained_bert/tests/modeling_transfo_xl_test.py b/pytorch_transformers/tests/modeling_transfo_xl_test.py similarity index 97% rename from pytorch_pretrained_bert/tests/modeling_transfo_xl_test.py rename to pytorch_transformers/tests/modeling_transfo_xl_test.py index caeb25b412..d15a19eb64 100644 --- a/pytorch_pretrained_bert/tests/modeling_transfo_xl_test.py +++ b/pytorch_transformers/tests/modeling_transfo_xl_test.py @@ -25,8 +25,8 @@ import pytest import torch -from pytorch_pretrained_bert import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel) -from pytorch_pretrained_bert.modeling_transfo_xl import PRETRAINED_MODEL_ARCHIVE_MAP +from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel) +from pytorch_transformers.modeling_transfo_xl import PRETRAINED_MODEL_ARCHIVE_MAP from .model_tests_commons import ConfigTester, create_and_check_commons, ids_tensor @@ -184,7 +184,7 @@ class TransfoXLModelTest(unittest.TestCase): @pytest.mark.slow def test_model_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/modeling_xlm_test.py b/pytorch_transformers/tests/modeling_xlm_test.py similarity index 97% rename from pytorch_pretrained_bert/tests/modeling_xlm_test.py rename to pytorch_transformers/tests/modeling_xlm_test.py index 3e442a09fb..8a8905cc31 100644 --- a/pytorch_pretrained_bert/tests/modeling_xlm_test.py +++ b/pytorch_transformers/tests/modeling_xlm_test.py @@ -20,8 +20,8 @@ import unittest import shutil import pytest -from pytorch_pretrained_bert import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification) -from pytorch_pretrained_bert.modeling_xlm import PRETRAINED_MODEL_ARCHIVE_MAP +from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification) +from pytorch_transformers.modeling_xlm import PRETRAINED_MODEL_ARCHIVE_MAP from .model_tests_commons import (create_and_check_commons, ConfigTester, ids_tensor) @@ -250,7 +250,7 @@ class XLMModelTest(unittest.TestCase): @pytest.mark.slow def test_model_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/modeling_xlnet_test.py b/pytorch_transformers/tests/modeling_xlnet_test.py similarity index 97% rename from pytorch_pretrained_bert/tests/modeling_xlnet_test.py rename to pytorch_transformers/tests/modeling_xlnet_test.py index 58617cf7b9..b9d55a26c7 100644 --- a/pytorch_pretrained_bert/tests/modeling_xlnet_test.py +++ b/pytorch_transformers/tests/modeling_xlnet_test.py @@ -25,8 +25,8 @@ import pytest import torch -from pytorch_pretrained_bert import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering) -from pytorch_pretrained_bert.modeling_xlnet import PRETRAINED_MODEL_ARCHIVE_MAP +from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering) +from pytorch_transformers.modeling_xlnet import PRETRAINED_MODEL_ARCHIVE_MAP from .model_tests_commons import ConfigTester, create_and_check_commons, ids_tensor @@ -278,7 +278,7 @@ class XLNetModelTest(unittest.TestCase): @pytest.mark.slow def test_model_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = XLNetModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/optimization_test.py b/pytorch_transformers/tests/optimization_test.py similarity index 94% rename from pytorch_pretrained_bert/tests/optimization_test.py rename to pytorch_transformers/tests/optimization_test.py index c6924bd4bc..dfbbd44b6e 100644 --- a/pytorch_pretrained_bert/tests/optimization_test.py +++ b/pytorch_transformers/tests/optimization_test.py @@ -20,9 +20,9 @@ import unittest import torch -from pytorch_pretrained_bert import BertAdam -from pytorch_pretrained_bert import OpenAIAdam -from pytorch_pretrained_bert.optimization import ConstantLR, WarmupLinearSchedule, WarmupConstantSchedule, \ +from pytorch_transformers import BertAdam +from pytorch_transformers import OpenAIAdam +from pytorch_transformers.optimization import ConstantLR, WarmupLinearSchedule, WarmupConstantSchedule, \ WarmupCosineWithWarmupRestartsSchedule, WarmupCosineWithHardRestartsSchedule, WarmupCosineSchedule import numpy as np diff --git a/pytorch_pretrained_bert/tests/tokenization_bert_test.py b/pytorch_transformers/tests/tokenization_bert_test.py similarity index 97% rename from pytorch_pretrained_bert/tests/tokenization_bert_test.py rename to pytorch_transformers/tests/tokenization_bert_test.py index 3d0b4323b2..59a87a4cb9 100644 --- a/pytorch_pretrained_bert/tests/tokenization_bert_test.py +++ b/pytorch_transformers/tests/tokenization_bert_test.py @@ -20,7 +20,7 @@ from io import open import shutil import pytest -from pytorch_pretrained_bert.tokenization_bert import (BasicTokenizer, +from pytorch_transformers.tokenization_bert import (BasicTokenizer, BertTokenizer, WordpieceTokenizer, _is_control, _is_punctuation, @@ -51,7 +51,7 @@ class TokenizationTest(unittest.TestCase): @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = BertTokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/tokenization_gpt2_test.py b/pytorch_transformers/tests/tokenization_gpt2_test.py similarity index 94% rename from pytorch_pretrained_bert/tests/tokenization_gpt2_test.py rename to pytorch_transformers/tests/tokenization_gpt2_test.py index 70f69a1f23..c6d926bdd4 100644 --- a/pytorch_pretrained_bert/tests/tokenization_gpt2_test.py +++ b/pytorch_transformers/tests/tokenization_gpt2_test.py @@ -20,7 +20,7 @@ import json import shutil import pytest -from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP +from pytorch_transformers.tokenization_gpt2 import GPT2Tokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP from .tokenization_tests_commons import create_and_check_tokenizer_commons @@ -58,7 +58,7 @@ class GPT2TokenizationTest(unittest.TestCase): # @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = GPT2Tokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/tokenization_openai_test.py b/pytorch_transformers/tests/tokenization_openai_test.py similarity index 94% rename from pytorch_pretrained_bert/tests/tokenization_openai_test.py rename to pytorch_transformers/tests/tokenization_openai_test.py index 6ae72858a7..38315f927b 100644 --- a/pytorch_pretrained_bert/tests/tokenization_openai_test.py +++ b/pytorch_transformers/tests/tokenization_openai_test.py @@ -20,7 +20,7 @@ import json import shutil import pytest -from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP +from pytorch_transformers.tokenization_openai import OpenAIGPTTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP from.tokenization_tests_commons import create_and_check_tokenizer_commons @@ -60,7 +60,7 @@ class OpenAIGPTTokenizationTest(unittest.TestCase): @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = OpenAIGPTTokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/tokenization_tests_commons.py b/pytorch_transformers/tests/tokenization_tests_commons.py similarity index 100% rename from pytorch_pretrained_bert/tests/tokenization_tests_commons.py rename to pytorch_transformers/tests/tokenization_tests_commons.py diff --git a/pytorch_pretrained_bert/tests/tokenization_transfo_xl_test.py b/pytorch_transformers/tests/tokenization_transfo_xl_test.py similarity index 93% rename from pytorch_pretrained_bert/tests/tokenization_transfo_xl_test.py rename to pytorch_transformers/tests/tokenization_transfo_xl_test.py index a5ff30ab6e..f744e319c8 100644 --- a/pytorch_pretrained_bert/tests/tokenization_transfo_xl_test.py +++ b/pytorch_transformers/tests/tokenization_transfo_xl_test.py @@ -20,7 +20,7 @@ from io import open import shutil import pytest -from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP +from pytorch_transformers.tokenization_transfo_xl import TransfoXLTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP from.tokenization_tests_commons import create_and_check_tokenizer_commons @@ -61,7 +61,7 @@ class TransfoXLTokenizationTest(unittest.TestCase): @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = TransfoXLTokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/tokenization_xlm_test.py b/pytorch_transformers/tests/tokenization_xlm_test.py similarity index 94% rename from pytorch_pretrained_bert/tests/tokenization_xlm_test.py rename to pytorch_transformers/tests/tokenization_xlm_test.py index 3b2db8ea1f..9cc18f3d60 100644 --- a/pytorch_pretrained_bert/tests/tokenization_xlm_test.py +++ b/pytorch_transformers/tests/tokenization_xlm_test.py @@ -20,7 +20,7 @@ import json import shutil import pytest -from pytorch_pretrained_bert.tokenization_xlm import XLMTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP +from pytorch_transformers.tokenization_xlm import XLMTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP from.tokenization_tests_commons import create_and_check_tokenizer_commons @@ -59,7 +59,7 @@ class XLMTokenizationTest(unittest.TestCase): @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = XLMTokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tests/tokenization_xlnet_test.py b/pytorch_transformers/tests/tokenization_xlnet_test.py similarity index 97% rename from pytorch_pretrained_bert/tests/tokenization_xlnet_test.py rename to pytorch_transformers/tests/tokenization_xlnet_test.py index 9b6dd5a6c4..4dd76e114b 100644 --- a/pytorch_pretrained_bert/tests/tokenization_xlnet_test.py +++ b/pytorch_transformers/tests/tokenization_xlnet_test.py @@ -19,7 +19,7 @@ import unittest import shutil import pytest -from pytorch_pretrained_bert.tokenization_xlnet import (XLNetTokenizer, +from pytorch_transformers.tokenization_xlnet import (XLNetTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP, SPIECE_UNDERLINE) @@ -62,7 +62,7 @@ class XLNetTokenizationTest(unittest.TestCase): @pytest.mark.slow def test_tokenizer_from_pretrained(self): - cache_dir = "/tmp/pytorch_pretrained_bert_test/" + cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: tokenizer = XLNetTokenizer.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) diff --git a/pytorch_pretrained_bert/tokenization_bert.py b/pytorch_transformers/tokenization_bert.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_bert.py rename to pytorch_transformers/tokenization_bert.py diff --git a/pytorch_pretrained_bert/tokenization_gpt2.py b/pytorch_transformers/tokenization_gpt2.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_gpt2.py rename to pytorch_transformers/tokenization_gpt2.py diff --git a/pytorch_pretrained_bert/tokenization_openai.py b/pytorch_transformers/tokenization_openai.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_openai.py rename to pytorch_transformers/tokenization_openai.py diff --git a/pytorch_pretrained_bert/tokenization_transfo_xl.py b/pytorch_transformers/tokenization_transfo_xl.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_transfo_xl.py rename to pytorch_transformers/tokenization_transfo_xl.py diff --git a/pytorch_pretrained_bert/tokenization_xlm.py b/pytorch_transformers/tokenization_xlm.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_xlm.py rename to pytorch_transformers/tokenization_xlm.py diff --git a/pytorch_pretrained_bert/tokenization_xlnet.py b/pytorch_transformers/tokenization_xlnet.py similarity index 100% rename from pytorch_pretrained_bert/tokenization_xlnet.py rename to pytorch_transformers/tokenization_xlnet.py diff --git a/setup.py b/setup.py index 28e85a0068..09b8c01ad5 100644 --- a/setup.py +++ b/setup.py @@ -37,16 +37,16 @@ from io import open from setuptools import find_packages, setup setup( - name="pytorch_pretrained_bert", - version="0.6.2", - author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors, Open AI team Authors", + name="pytorch_transformers", + version="0.7.0", + author="Thomas Wolf, Lysandre Debut, Victor Sanh, Tim Rault, Google AI Language Team Authors, Open AI team Authors", author_email="thomas@huggingface.co", description="PyTorch version of Google AI BERT model with script to load Google pre-trained models", long_description=open("README.md", "r", encoding='utf-8').read(), long_description_content_type="text/markdown", keywords='BERT NLP deep learning google', license='Apache', - url="https://github.com/huggingface/pytorch-pretrained-BERT", + url="https://github.com/huggingface/pytorch-transformers", packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=['torch>=0.4.1', @@ -58,7 +58,7 @@ setup( 'sentencepiece'], entry_points={ 'console_scripts': [ - "pytorch_pretrained_bert=pytorch_pretrained_bert.__main__:main", + "pytorch_transformers=pytorch_transformers.__main__:main", ] }, # python_requires='>=3.5.0',