From 68f77303b294385239c0b356948c93d68ea09715 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sun, 9 Dec 2018 16:17:11 -0500 Subject: [PATCH 1/7] fixing Adam weights skip in TF convert script --- pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py b/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py index 20fdd8c0d6..79b5f41adc 100755 --- a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py +++ b/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py @@ -50,7 +50,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytor name = name.split('/') # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model - if name[-1] in ["adam_v", "adam_m"]: + if any(n in ["adam_v", "adam_m"] for n in name): print("Skipping {}".format("/".join(name))) continue pointer = model From 1db916b5be1ee281fe08780e07fa24e2d9471c92 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sun, 9 Dec 2018 16:57:51 -0500 Subject: [PATCH 2/7] compatibility PT 1.0 and 0.4.1 --- tests/optimization_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/optimization_test.py b/tests/optimization_test.py index 1c010750ae..1846373591 100644 --- a/tests/optimization_test.py +++ b/tests/optimization_test.py @@ -32,7 +32,7 @@ class OptimizationTest(unittest.TestCase): def test_adam(self): w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True) target = torch.tensor([0.4, 0.2, -0.5]) - criterion = torch.nn.MSELoss(reduction='elementwise_mean') + criterion = torch.nn.MSELoss() # No warmup, constant schedule, no gradient clipping optimizer = BertAdam(params=[w], lr=2e-1, weight_decay_rate=0.0, From 174cdbccde6601884cf0a25d2902c6ba31130de4 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Sun, 9 Dec 2018 17:04:23 -0500 Subject: [PATCH 3/7] adding save checkpoint and loading in examples --- examples/run_classifier.py | 6 +++++- examples/run_squad.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index a5e7d2c30d..3d13ee463f 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -359,7 +359,7 @@ def main(): default=None, type=str, required=True, - help="The output directory where the model checkpoints will be written.") + help="The output directory where the model predictions and checkpoints will be written.") ## Other parameters parser.add_argument("--max_seq_length", @@ -626,6 +626,10 @@ def main(): 'global_step': global_step, 'loss': tr_loss/nb_tr_steps} + model_to_save = model.module if hasattr(model, 'module') else model + raise NotImplementedError # TODO add save of the configuration file and vocabulary file also ? + output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") + torch.save(model_to_save, output_model_file) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") diff --git a/examples/run_squad.py b/examples/run_squad.py index e47730043e..cd0a9a3028 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -706,7 +706,7 @@ def main(): help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.") parser.add_argument("--output_dir", default=None, type=str, required=True, - help="The output directory where the model checkpoints will be written.") + help="The output directory where the model checkpoints and predictions will be written.") ## Other parameters parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") From 270fa2f20b6dd9736a08f24e6050f24b2a96b010 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 11 Dec 2018 11:50:38 +0100 Subject: [PATCH 4/7] add pretrained loading from state_dict --- pytorch_pretrained_bert/modeling.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pytorch_pretrained_bert/modeling.py b/pytorch_pretrained_bert/modeling.py index 3af5854072..3d04f0842c 100644 --- a/pytorch_pretrained_bert/modeling.py +++ b/pytorch_pretrained_bert/modeling.py @@ -445,9 +445,9 @@ class PreTrainedBertModel(nn.Module): module.bias.data.zero_() @classmethod - def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs): + def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, *inputs, **kwargs): """ - Instantiate a PreTrainedBertModel from a pre-trained model file. + Instantiate a PreTrainedBertModel from a pre-trained model file or a pytorch state dict. Download and cache the pre-trained model file if needed. Params: @@ -461,6 +461,8 @@ class PreTrainedBertModel(nn.Module): - a path or url to a pretrained model archive containing: . `bert_config.json` a configuration file for the model . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance + cache_dir: an optional path to a folder in which the pre-trained models will be cached. + state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models *inputs, **kwargs: additional input for the specific Bert class (ex: num_labels for BertForSequenceClassification) """ @@ -502,8 +504,9 @@ class PreTrainedBertModel(nn.Module): logger.info("Model config {}".format(config)) # Instantiate model. model = cls(config, *inputs, **kwargs) - weights_path = os.path.join(serialization_dir, WEIGHTS_NAME) - state_dict = torch.load(weights_path) + if state_dict is None: + weights_path = os.path.join(serialization_dir, WEIGHTS_NAME) + state_dict = torch.load(weights_path) missing_keys = [] unexpected_keys = [] From b13abfa9feb648836e47ba6e47fa18d28dd300ea Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 11 Dec 2018 11:58:07 +0100 Subject: [PATCH 5/7] add saving and loading model in examples --- examples/run_classifier.py | 17 +++++++++++------ examples/run_squad.py | 9 +++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 3d13ee463f..b535415a74 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -487,8 +487,8 @@ def main(): len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs) # Prepare model - model = BertForSequenceClassification.from_pretrained(args.bert_model, - cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank)) + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank) # for distributed learning + model = BertForSequenceClassification.from_pretrained(args.bert_model, cache_dir=cache_dir) if args.fp16: model.half() model.to(device) @@ -579,6 +579,15 @@ def main(): model.zero_grad() global_step += 1 + # Save a trained model + model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self + output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") + torch.save(model_to_save.state_dict(), output_model_file) + + # Load a trained model that you have fine-tuned + model_state_dict = torch.load(output_model_file) + model = BertForSequenceClassification.from_pretrained(args.bert_model, state_dict=model_state_dict) + if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = processor.get_dev_examples(args.data_dir) eval_features = convert_examples_to_features( @@ -626,10 +635,6 @@ def main(): 'global_step': global_step, 'loss': tr_loss/nb_tr_steps} - model_to_save = model.module if hasattr(model, 'module') else model - raise NotImplementedError # TODO add save of the configuration file and vocabulary file also ? - output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") - torch.save(model_to_save, output_model_file) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") diff --git a/examples/run_squad.py b/examples/run_squad.py index cd0a9a3028..cd10e5d5f1 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -933,6 +933,15 @@ def main(): model.zero_grad() global_step += 1 + # Save a trained model + model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self + output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") + torch.save(model_to_save.state_dict(), output_model_file) + + # Load a trained model that you have fine-tuned + model_state_dict = torch.load(output_model_file) + model = BertForQuestionAnswering.from_pretrained(args.bert_model, state_dict=model_state_dict) + if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = read_squad_examples( input_file=args.predict_file, is_training=False) From ed3b62cd3bc5529b6388e405d4ada78a88903800 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 11 Dec 2018 12:12:08 +0100 Subject: [PATCH 6/7] added version in __init__.py --- pytorch_pretrained_bert/__init__.py | 1 + requirements.txt | 5 ++--- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_pretrained_bert/__init__.py b/pytorch_pretrained_bert/__init__.py index fc9b15a12d..f8d04f5d7f 100644 --- a/pytorch_pretrained_bert/__init__.py +++ b/pytorch_pretrained_bert/__init__.py @@ -1,3 +1,4 @@ +__version__ = 0.4.0 from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .modeling import (BertConfig, BertModel, BertForPreTraining, BertForMaskedLM, BertForNextSentencePrediction, diff --git a/requirements.txt b/requirements.txt index e9a3640a9b..f37f11cc54 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -# This installs Pytorch for CUDA 8 only. If you are using a newer version, -# please visit http://pytorch.org/ and install the relevant version. -torch>=0.4.1,<0.5.0 +# PyTorch +torch>=0.4.1 # progress bars in model download and training scripts tqdm # Accessing files from S3 directly. diff --git a/setup.py b/setup.py index fc793b53e6..21ca97294d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import find_packages, setup setup( name="pytorch_pretrained_bert", - version="0.3.0", + version="0.4.0", author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors", author_email="thomas@huggingface.co", description="PyTorch version of Google AI BERT model with script to load Google pre-trained models", From 770f805ae521b0890438092b09475f99b37643de Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 11 Dec 2018 12:20:22 +0100 Subject: [PATCH 7/7] include version number + comment in setup.py --- pytorch_pretrained_bert/__init__.py | 2 +- setup.py | 35 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/pytorch_pretrained_bert/__init__.py b/pytorch_pretrained_bert/__init__.py index f8d04f5d7f..ebc4f7edcc 100644 --- a/pytorch_pretrained_bert/__init__.py +++ b/pytorch_pretrained_bert/__init__.py @@ -1,4 +1,4 @@ -__version__ = 0.4.0 +__version__ = "0.4.0" from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .modeling import (BertConfig, BertModel, BertForPreTraining, BertForMaskedLM, BertForNextSentencePrediction, diff --git a/setup.py b/setup.py index 21ca97294d..a1e1f68db6 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,38 @@ +""" +Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/master/setup.py + +To create the package for pypi. + +1. Change the version in __init__.py and setup.py. + +2. Commit these changes with the message: "Release: VERSION" + +3. Add a tag in git to mark the release: "git tag VERSION -m'Adds tag VERSION for pypi' " + Push the tag to git: git push --tags origin master + +4. Build both the sources and the wheel. Do not change anything in setup.py between + creating the wheel and the source distribution (obviously). + + For the wheel, run: "python setup.py bdist_wheel" in the top level allennlp directory. + (this will build a wheel for the python version you use to build it - make sure you use python 3.x). + + For the sources, run: "python setup.py sdist" + You should now have a /dist directory with both .whl and .tar.gz source versions of allennlp. + +5. Check that everything looks correct by uploading the package to the pypi test server: + + twine upload dist/* -r pypitest + (pypi suggest using twine as other methods upload files via plaintext.) + + Check that you can install it in a virtualenv by running: + pip install -i https://testpypi.python.org/pypi allennlp + +6. Upload the final version to actual pypi: + twine upload dist/* -r pypi + +7. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory. + +""" from setuptools import find_packages, setup setup(