From 31d387604c67d738740a9ae9350df0a273802966 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Wed, 17 Apr 2019 11:58:27 +0200
Subject: [PATCH] adding s3 model tests with --runslow

---
 .circleci/config.yml                  |  4 ++--
 tests/conftest.py                     | 19 +++++++++++++++++++
 tests/modeling_gpt2_test.py           | 12 +++++++++++-
 tests/modeling_openai_test.py         | 12 +++++++++++-
 tests/modeling_test.py                | 11 +++++++++++
 tests/modeling_transfo_xl_test.py     | 12 +++++++++++-
 tests/tokenization_gpt2_test.py       | 11 ++++++++++-
 tests/tokenization_openai_test.py     | 12 +++++++++++-
 tests/tokenization_test.py            | 11 ++++++++++-
 tests/tokenization_transfo_xl_test.py | 11 ++++++++++-
 10 files changed, 106 insertions(+), 9 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b57b478030..7296e07ca3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,7 +9,7 @@ jobs:
             - run: sudo pip install --progress-bar off .
             - run: sudo pip install pytest ftfy spacy
             - run: sudo python -m spacy download en
-            - run: python -m pytest -sv tests/
+            - run: python -m pytest -sv tests/ --runslow
     build_py2:
         working_directory: ~/pytorch-pretrained-BERT
         docker:
@@ -20,7 +20,7 @@ jobs:
             - run: sudo pip install pytest spacy
             - run: sudo pip install ftfy==4.4.3
             - run: sudo python -m spacy download en
-            - run: python -m pytest -sv tests/
+            - run: python -m pytest -sv tests/ --runslow
 workflows:
   version: 2
   build_and_test:
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000000..841ebc8df9
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,19 @@
+# content of conftest.py
+
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--runslow", action="store_true", default=False, help="run slow tests"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow"):
+        # --runslow given in cli: do not skip slow tests
+        return
+    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
diff --git a/tests/modeling_gpt2_test.py b/tests/modeling_gpt2_test.py
index d542422060..8f4581b37f 100644
--- a/tests/modeling_gpt2_test.py
+++ b/tests/modeling_gpt2_test.py
@@ -20,12 +20,14 @@ import os
 import unittest
 import json
 import random
+import shutil
+import pytest
 
 import torch
 
 from pytorch_pretrained_bert import (GPT2Config, GPT2Model,
                                      GPT2LMHeadModel, GPT2DoubleHeadsModel)
-
+from pytorch_pretrained_bert.modeling_gpt2 import PRETRAINED_MODEL_ARCHIVE_MAP
 
 class GPT2ModelTest(unittest.TestCase):
     class GPT2ModelTester(object):
@@ -185,6 +187,14 @@ class GPT2ModelTest(unittest.TestCase):
         os.remove(json_file_path)
         self.assertEqual(config_second.to_dict(), config_first.to_dict())
 
+    @pytest.mark.slow
+    def test_model_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
+            model = GPT2Model.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(model)
+
     def run_tester(self, tester):
         config_and_inputs = tester.prepare_config_and_inputs()
         output_result = tester.create_gpt2_model(*config_and_inputs)
diff --git a/tests/modeling_openai_test.py b/tests/modeling_openai_test.py
index db03bf792e..4e7d9d542b 100644
--- a/tests/modeling_openai_test.py
+++ b/tests/modeling_openai_test.py
@@ -20,12 +20,14 @@ import os
 import unittest
 import json
 import random
+import shutil
+import pytest
 
 import torch
 
 from pytorch_pretrained_bert import (OpenAIGPTConfig, OpenAIGPTModel,
                                      OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
-
+from pytorch_pretrained_bert.modeling_openai import PRETRAINED_MODEL_ARCHIVE_MAP
 
 class OpenAIGPTModelTest(unittest.TestCase):
     class OpenAIGPTModelTester(object):
@@ -197,6 +199,14 @@ class OpenAIGPTModelTest(unittest.TestCase):
         os.remove(json_file_path)
         self.assertEqual(config_second.to_dict(), config_first.to_dict())
 
+    @pytest.mark.slow
+    def test_model_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
+            model = OpenAIGPTModel.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(model)
+
     def run_tester(self, tester):
         config_and_inputs = tester.prepare_config_and_inputs()
         output_result = tester.create_openai_model(*config_and_inputs)
diff --git a/tests/modeling_test.py b/tests/modeling_test.py
index 02d7a13fda..5cde383fdf 100644
--- a/tests/modeling_test.py
+++ b/tests/modeling_test.py
@@ -20,6 +20,8 @@ import os
 import unittest
 import json
 import random
+import shutil
+import pytest
 
 import torch
 
@@ -27,6 +29,7 @@ from pytorch_pretrained_bert import (BertConfig, BertModel, BertForMaskedLM,
                                      BertForNextSentencePrediction, BertForPreTraining,
                                      BertForQuestionAnswering, BertForSequenceClassification,
                                      BertForTokenClassification)
+from pytorch_pretrained_bert.modeling import PRETRAINED_MODEL_ARCHIVE_MAP
 
 
 class BertModelTest(unittest.TestCase):
@@ -260,6 +263,14 @@ class BertModelTest(unittest.TestCase):
         os.remove(json_file_path)
         self.assertEqual(config_second.to_dict(), config_first.to_dict())
 
+    @pytest.mark.slow
+    def test_model_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
+            model = BertModel.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(model)
+
     def run_tester(self, tester):
         config_and_inputs = tester.prepare_config_and_inputs()
         output_result = tester.create_bert_model(*config_and_inputs)
diff --git a/tests/modeling_transfo_xl_test.py b/tests/modeling_transfo_xl_test.py
index a59d90b205..e5c5f3d163 100644
--- a/tests/modeling_transfo_xl_test.py
+++ b/tests/modeling_transfo_xl_test.py
@@ -20,11 +20,13 @@ import os
 import unittest
 import json
 import random
+import shutil
+import pytest
 
 import torch
 
 from pytorch_pretrained_bert import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
-
+from pytorch_pretrained_bert.modeling_transfo_xl import PRETRAINED_MODEL_ARCHIVE_MAP
 
 class TransfoXLModelTest(unittest.TestCase):
     class TransfoXLModelTester(object):
@@ -195,6 +197,14 @@ class TransfoXLModelTest(unittest.TestCase):
         os.remove(json_file_path)
         self.assertEqual(config_second.to_dict(), config_first.to_dict())
 
+    @pytest.mark.slow
+    def test_model_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
+            model = TransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(model)
+
     def run_tester(self, tester):
         config_and_inputs = tester.prepare_config_and_inputs()
 
diff --git a/tests/tokenization_gpt2_test.py b/tests/tokenization_gpt2_test.py
index 0773574360..870f61ca79 100644
--- a/tests/tokenization_gpt2_test.py
+++ b/tests/tokenization_gpt2_test.py
@@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import os
 import unittest
 import json
+import shutil
+import pytest
 
-from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer
+from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP
 
 
 class GPT2TokenizationTest(unittest.TestCase):
@@ -64,6 +66,13 @@ class GPT2TokenizationTest(unittest.TestCase):
             [tokenizer_2.encoder, tokenizer_2.decoder, tokenizer_2.bpe_ranks,
              tokenizer_2.special_tokens, tokenizer_2.special_tokens_decoder])
 
+    @pytest.mark.slow
+    def test_tokenizer_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]:
+            tokenizer = GPT2Tokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(tokenizer)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/tokenization_openai_test.py b/tests/tokenization_openai_test.py
index 2011ccc1df..a57f86be57 100644
--- a/tests/tokenization_openai_test.py
+++ b/tests/tokenization_openai_test.py
@@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import os
 import unittest
 import json
+import shutil
+import pytest
 
-from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer
+from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP
 
 
 class OpenAIGPTTokenizationTest(unittest.TestCase):
@@ -64,6 +66,14 @@ class OpenAIGPTTokenizationTest(unittest.TestCase):
             [tokenizer_2.encoder, tokenizer_2.decoder, tokenizer_2.bpe_ranks,
              tokenizer_2.special_tokens, tokenizer_2.special_tokens_decoder])
 
+    @pytest.mark.slow
+    def test_tokenizer_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]:
+            tokenizer = OpenAIGPTTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(tokenizer)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/tokenization_test.py b/tests/tokenization_test.py
index 15cc7ccd82..fe120a522c 100644
--- a/tests/tokenization_test.py
+++ b/tests/tokenization_test.py
@@ -17,12 +17,14 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import os
 import unittest
 from io import open
+import shutil
+import pytest
 
 from pytorch_pretrained_bert.tokenization import (BasicTokenizer,
                                                   BertTokenizer,
                                                   WordpieceTokenizer,
                                                   _is_control, _is_punctuation,
-                                                  _is_whitespace)
+                                                  _is_whitespace, PRETRAINED_VOCAB_ARCHIVE_MAP)
 
 
 class TokenizationTest(unittest.TestCase):
@@ -56,6 +58,13 @@ class TokenizationTest(unittest.TestCase):
         self.assertListEqual(
             tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
 
+    @pytest.mark.slow
+    def test_tokenizer_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]:
+            tokenizer = BertTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(tokenizer)
 
     def test_chinese(self):
         tokenizer = BasicTokenizer()
diff --git a/tests/tokenization_transfo_xl_test.py b/tests/tokenization_transfo_xl_test.py
index 1a805f11e6..bf0ac5db2f 100644
--- a/tests/tokenization_transfo_xl_test.py
+++ b/tests/tokenization_transfo_xl_test.py
@@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import os
 import unittest
 from io import open
+import shutil
+import pytest
 
-from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer
+from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP
 
 
 class TransfoXLTokenizationTest(unittest.TestCase):
@@ -66,6 +68,13 @@ class TransfoXLTokenizationTest(unittest.TestCase):
             tokenizer.tokenize(u" \tHeLLo ! how  \n Are yoU ?  "),
             ["HeLLo", "!", "how", "Are", "yoU", "?"])
 
+    @pytest.mark.slow
+    def test_tokenizer_from_pretrained(self):
+        cache_dir = "/tmp/pytorch_pretrained_bert_test/"
+        for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]:
+            tokenizer = TransfoXLTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
+            shutil.rmtree(cache_dir)
+            self.assertIsNotNone(tokenizer)
 
 if __name__ == '__main__':
     unittest.main()