From 68f77303b294385239c0b356948c93d68ea09715 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Sun, 9 Dec 2018 16:17:11 -0500
Subject: [PATCH 1/7] fixing Adam weights skip in TF convert script

---
 pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py b/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py
index 20fdd8c0d6..79b5f41adc 100755
--- a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py
+++ b/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py
@@ -50,7 +50,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytor
         name = name.split('/')
         # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
         # which are not required for using pretrained model
-        if name[-1] in ["adam_v", "adam_m"]:
+        if any(n in ["adam_v", "adam_m"] for n in name):
             print("Skipping {}".format("/".join(name)))
             continue
         pointer = model

From 1db916b5be1ee281fe08780e07fa24e2d9471c92 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Sun, 9 Dec 2018 16:57:51 -0500
Subject: [PATCH 2/7] compatibility PT 1.0 and 0.4.1

---
 tests/optimization_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/optimization_test.py b/tests/optimization_test.py
index 1c010750ae..1846373591 100644
--- a/tests/optimization_test.py
+++ b/tests/optimization_test.py
@@ -32,7 +32,7 @@ class OptimizationTest(unittest.TestCase):
     def test_adam(self):
         w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True)
         target = torch.tensor([0.4, 0.2, -0.5])
-        criterion = torch.nn.MSELoss(reduction='elementwise_mean')
+        criterion = torch.nn.MSELoss()
         # No warmup, constant schedule, no gradient clipping
         optimizer = BertAdam(params=[w], lr=2e-1,
                                           weight_decay_rate=0.0,

From 174cdbccde6601884cf0a25d2902c6ba31130de4 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Sun, 9 Dec 2018 17:04:23 -0500
Subject: [PATCH 3/7] adding save checkpoint and loading in examples

---
 examples/run_classifier.py | 6 +++++-
 examples/run_squad.py      | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/run_classifier.py b/examples/run_classifier.py
index a5e7d2c30d..3d13ee463f 100644
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -359,7 +359,7 @@ def main():
                         default=None,
                         type=str,
                         required=True,
-                        help="The output directory where the model checkpoints will be written.")
+                        help="The output directory where the model predictions and checkpoints will be written.")
 
     ## Other parameters
     parser.add_argument("--max_seq_length",
@@ -626,6 +626,10 @@ def main():
                   'global_step': global_step,
                   'loss': tr_loss/nb_tr_steps}
 
+        model_to_save = model.module if hasattr(model, 'module') else model
+        raise NotImplementedError # TODO add save of the configuration file and vocabulary file also ?
+        output_model_file = os.path.join(args.output_dir, "pytorch_model.bin")
+        torch.save(model_to_save, output_model_file)
         output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
         with open(output_eval_file, "w") as writer:
             logger.info("***** Eval results *****")
diff --git a/examples/run_squad.py b/examples/run_squad.py
index e47730043e..cd0a9a3028 100644
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -706,7 +706,7 @@ def main():
                         help="Bert pre-trained model selected in the list: bert-base-uncased, "
                              "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.")
     parser.add_argument("--output_dir", default=None, type=str, required=True,
-                        help="The output directory where the model checkpoints will be written.")
+                        help="The output directory where the model checkpoints and predictions will be written.")
 
     ## Other parameters
     parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json")

From 270fa2f20b6dd9736a08f24e6050f24b2a96b010 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 11 Dec 2018 11:50:38 +0100
Subject: [PATCH 4/7] add pretrained loading from state_dict

---
 pytorch_pretrained_bert/modeling.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pytorch_pretrained_bert/modeling.py b/pytorch_pretrained_bert/modeling.py
index 3af5854072..3d04f0842c 100644
--- a/pytorch_pretrained_bert/modeling.py
+++ b/pytorch_pretrained_bert/modeling.py
@@ -445,9 +445,9 @@ class PreTrainedBertModel(nn.Module):
             module.bias.data.zero_()
 
     @classmethod
-    def from_pretrained(cls, pretrained_model_name, cache_dir=None, *inputs, **kwargs):
+    def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, *inputs, **kwargs):
         """
-        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Instantiate a PreTrainedBertModel from a pre-trained model file or a pytorch state dict.
         Download and cache the pre-trained model file if needed.
         
         Params:
@@ -461,6 +461,8 @@ class PreTrainedBertModel(nn.Module):
                 - a path or url to a pretrained model archive containing:
                     . `bert_config.json` a configuration file for the model
                     . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
+            cache_dir: an optional path to a folder in which the pre-trained models will be cached.
+            state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
             *inputs, **kwargs: additional input for the specific Bert class
                 (ex: num_labels for BertForSequenceClassification)
         """
@@ -502,8 +504,9 @@ class PreTrainedBertModel(nn.Module):
         logger.info("Model config {}".format(config))
         # Instantiate model.
         model = cls(config, *inputs, **kwargs)
-        weights_path = os.path.join(serialization_dir, WEIGHTS_NAME)
-        state_dict = torch.load(weights_path)
+        if state_dict is None:
+            weights_path = os.path.join(serialization_dir, WEIGHTS_NAME)
+            state_dict = torch.load(weights_path)
 
         missing_keys = []
         unexpected_keys = []

From b13abfa9feb648836e47ba6e47fa18d28dd300ea Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 11 Dec 2018 11:58:07 +0100
Subject: [PATCH 5/7] add saving and loading model in examples

---
 examples/run_classifier.py | 17 +++++++++++------
 examples/run_squad.py      |  9 +++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/examples/run_classifier.py b/examples/run_classifier.py
index 3d13ee463f..b535415a74 100644
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -487,8 +487,8 @@ def main():
             len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)
 
     # Prepare model
-    model = BertForSequenceClassification.from_pretrained(args.bert_model, 
-                cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank))
+    cache_dir = PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank) # for distributed learning
+    model = BertForSequenceClassification.from_pretrained(args.bert_model, cache_dir=cache_dir)
     if args.fp16:
         model.half()
     model.to(device)
@@ -579,6 +579,15 @@ def main():
                     model.zero_grad()
                     global_step += 1
 
+    # Save a trained model
+    model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
+    output_model_file = os.path.join(args.output_dir, "pytorch_model.bin")
+    torch.save(model_to_save.state_dict(), output_model_file)
+
+    # Load a trained model that you have fine-tuned
+    model_state_dict = torch.load(output_model_file)
+    model = BertForSequenceClassification.from_pretrained(args.bert_model, state_dict=model_state_dict)
+
     if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
         eval_examples = processor.get_dev_examples(args.data_dir)
         eval_features = convert_examples_to_features(
@@ -626,10 +635,6 @@ def main():
                   'global_step': global_step,
                   'loss': tr_loss/nb_tr_steps}
 
-        model_to_save = model.module if hasattr(model, 'module') else model
-        raise NotImplementedError # TODO add save of the configuration file and vocabulary file also ?
-        output_model_file = os.path.join(args.output_dir, "pytorch_model.bin")
-        torch.save(model_to_save, output_model_file)
         output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
         with open(output_eval_file, "w") as writer:
             logger.info("***** Eval results *****")
diff --git a/examples/run_squad.py b/examples/run_squad.py
index cd0a9a3028..cd10e5d5f1 100644
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -933,6 +933,15 @@ def main():
                     model.zero_grad()
                     global_step += 1
 
+    # Save a trained model
+    model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
+    output_model_file = os.path.join(args.output_dir, "pytorch_model.bin")
+    torch.save(model_to_save.state_dict(), output_model_file)
+
+    # Load a trained model that you have fine-tuned
+    model_state_dict = torch.load(output_model_file)
+    model = BertForQuestionAnswering.from_pretrained(args.bert_model, state_dict=model_state_dict)
+
     if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
         eval_examples = read_squad_examples(
             input_file=args.predict_file, is_training=False)

From ed3b62cd3bc5529b6388e405d4ada78a88903800 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 11 Dec 2018 12:12:08 +0100
Subject: [PATCH 6/7] added version in __init__.py

---
 pytorch_pretrained_bert/__init__.py | 1 +
 requirements.txt                    | 5 ++---
 setup.py                            | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch_pretrained_bert/__init__.py b/pytorch_pretrained_bert/__init__.py
index fc9b15a12d..f8d04f5d7f 100644
--- a/pytorch_pretrained_bert/__init__.py
+++ b/pytorch_pretrained_bert/__init__.py
@@ -1,3 +1,4 @@
+__version__ = 0.4.0
 from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
 from .modeling import (BertConfig, BertModel, BertForPreTraining,
                        BertForMaskedLM, BertForNextSentencePrediction,
diff --git a/requirements.txt b/requirements.txt
index e9a3640a9b..f37f11cc54 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
-# This installs Pytorch for CUDA 8 only. If you are using a newer version,
-# please visit http://pytorch.org/ and install the relevant version.
-torch>=0.4.1,<0.5.0
+# PyTorch
+torch>=0.4.1
 # progress bars in model download and training scripts
 tqdm
 # Accessing files from S3 directly.
diff --git a/setup.py b/setup.py
index fc793b53e6..21ca97294d 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 
 setup(
     name="pytorch_pretrained_bert",
-    version="0.3.0",
+    version="0.4.0",
     author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors",
     author_email="thomas@huggingface.co",
     description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",

From 770f805ae521b0890438092b09475f99b37643de Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 11 Dec 2018 12:20:22 +0100
Subject: [PATCH 7/7] include version number + comment in setup.py

---
 pytorch_pretrained_bert/__init__.py |  2 +-
 setup.py                            | 35 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/pytorch_pretrained_bert/__init__.py b/pytorch_pretrained_bert/__init__.py
index f8d04f5d7f..ebc4f7edcc 100644
--- a/pytorch_pretrained_bert/__init__.py
+++ b/pytorch_pretrained_bert/__init__.py
@@ -1,4 +1,4 @@
-__version__ = 0.4.0
+__version__ = "0.4.0"
 from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
 from .modeling import (BertConfig, BertModel, BertForPreTraining,
                        BertForMaskedLM, BertForNextSentencePrediction,
diff --git a/setup.py b/setup.py
index 21ca97294d..a1e1f68db6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,38 @@
+"""
+Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/master/setup.py
+
+To create the package for pypi.
+
+1. Change the version in __init__.py and setup.py.
+
+2. Commit these changes with the message: "Release: VERSION"
+
+3. Add a tag in git to mark the release: "git tag VERSION -m'Adds tag VERSION for pypi' "
+   Push the tag to git: git push --tags origin master
+
+4. Build both the sources and the wheel. Do not change anything in setup.py between
+   creating the wheel and the source distribution (obviously).
+
+   For the wheel, run: "python setup.py bdist_wheel" in the top level allennlp directory.
+   (this will build a wheel for the python version you use to build it - make sure you use python 3.x).
+
+   For the sources, run: "python setup.py sdist"
+   You should now have a /dist directory with both .whl and .tar.gz source versions of allennlp.
+
+5. Check that everything looks correct by uploading the package to the pypi test server:
+
+   twine upload dist/* -r pypitest
+   (pypi suggest using twine as other methods upload files via plaintext.)
+
+   Check that you can install it in a virtualenv by running:
+   pip install -i https://testpypi.python.org/pypi allennlp
+
+6. Upload the final version to actual pypi:
+   twine upload dist/* -r pypi
+
+7. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
+
+"""
 from setuptools import find_packages, setup
 
 setup(