From 31d387604c67d738740a9ae9350df0a273802966 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Wed, 17 Apr 2019 11:58:27 +0200 Subject: [PATCH] adding s3 model tests with --runslow --- .circleci/config.yml | 4 ++-- tests/conftest.py | 19 +++++++++++++++++++ tests/modeling_gpt2_test.py | 12 +++++++++++- tests/modeling_openai_test.py | 12 +++++++++++- tests/modeling_test.py | 11 +++++++++++ tests/modeling_transfo_xl_test.py | 12 +++++++++++- tests/tokenization_gpt2_test.py | 11 ++++++++++- tests/tokenization_openai_test.py | 12 +++++++++++- tests/tokenization_test.py | 11 ++++++++++- tests/tokenization_transfo_xl_test.py | 11 ++++++++++- 10 files changed, 106 insertions(+), 9 deletions(-) create mode 100644 tests/conftest.py diff --git a/.circleci/config.yml b/.circleci/config.yml index b57b478030..7296e07ca3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,7 +9,7 @@ jobs: - run: sudo pip install --progress-bar off . - run: sudo pip install pytest ftfy spacy - run: sudo python -m spacy download en - - run: python -m pytest -sv tests/ + - run: python -m pytest -sv tests/ --runslow build_py2: working_directory: ~/pytorch-pretrained-BERT docker: @@ -20,7 +20,7 @@ jobs: - run: sudo pip install pytest spacy - run: sudo pip install ftfy==4.4.3 - run: sudo python -m spacy download en - - run: python -m pytest -sv tests/ + - run: python -m pytest -sv tests/ --runslow workflows: version: 2 build_and_test: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..841ebc8df9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,19 @@ +# content of conftest.py + +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/tests/modeling_gpt2_test.py b/tests/modeling_gpt2_test.py index d542422060..8f4581b37f 100644 --- a/tests/modeling_gpt2_test.py +++ b/tests/modeling_gpt2_test.py @@ -20,12 +20,14 @@ import os import unittest import json import random +import shutil +import pytest import torch from pytorch_pretrained_bert import (GPT2Config, GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) - +from pytorch_pretrained_bert.modeling_gpt2 import PRETRAINED_MODEL_ARCHIVE_MAP class GPT2ModelTest(unittest.TestCase): class GPT2ModelTester(object): @@ -185,6 +187,14 @@ class GPT2ModelTest(unittest.TestCase): os.remove(json_file_path) self.assertEqual(config_second.to_dict(), config_first.to_dict()) + @pytest.mark.slow + def test_model_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + model = GPT2Model.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(model) + def run_tester(self, tester): config_and_inputs = tester.prepare_config_and_inputs() output_result = tester.create_gpt2_model(*config_and_inputs) diff --git a/tests/modeling_openai_test.py b/tests/modeling_openai_test.py index db03bf792e..4e7d9d542b 100644 --- a/tests/modeling_openai_test.py +++ b/tests/modeling_openai_test.py @@ -20,12 +20,14 @@ import os import unittest import json import random +import shutil +import pytest import torch from pytorch_pretrained_bert import (OpenAIGPTConfig, OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) - +from pytorch_pretrained_bert.modeling_openai import PRETRAINED_MODEL_ARCHIVE_MAP class OpenAIGPTModelTest(unittest.TestCase): class OpenAIGPTModelTester(object): @@ -197,6 +199,14 @@ class OpenAIGPTModelTest(unittest.TestCase): os.remove(json_file_path) self.assertEqual(config_second.to_dict(), config_first.to_dict()) + @pytest.mark.slow + def test_model_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + model = OpenAIGPTModel.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(model) + def run_tester(self, tester): config_and_inputs = tester.prepare_config_and_inputs() output_result = tester.create_openai_model(*config_and_inputs) diff --git a/tests/modeling_test.py b/tests/modeling_test.py index 02d7a13fda..5cde383fdf 100644 --- a/tests/modeling_test.py +++ b/tests/modeling_test.py @@ -20,6 +20,8 @@ import os import unittest import json import random +import shutil +import pytest import torch @@ -27,6 +29,7 @@ from pytorch_pretrained_bert import (BertConfig, BertModel, BertForMaskedLM, BertForNextSentencePrediction, BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification, BertForTokenClassification) +from pytorch_pretrained_bert.modeling import PRETRAINED_MODEL_ARCHIVE_MAP class BertModelTest(unittest.TestCase): @@ -260,6 +263,14 @@ class BertModelTest(unittest.TestCase): os.remove(json_file_path) self.assertEqual(config_second.to_dict(), config_first.to_dict()) + @pytest.mark.slow + def test_model_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + model = BertModel.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(model) + def run_tester(self, tester): config_and_inputs = tester.prepare_config_and_inputs() output_result = tester.create_bert_model(*config_and_inputs) diff --git a/tests/modeling_transfo_xl_test.py b/tests/modeling_transfo_xl_test.py index a59d90b205..e5c5f3d163 100644 --- a/tests/modeling_transfo_xl_test.py +++ b/tests/modeling_transfo_xl_test.py @@ -20,11 +20,13 @@ import os import unittest import json import random +import shutil +import pytest import torch from pytorch_pretrained_bert import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel) - +from pytorch_pretrained_bert.modeling_transfo_xl import PRETRAINED_MODEL_ARCHIVE_MAP class TransfoXLModelTest(unittest.TestCase): class TransfoXLModelTester(object): @@ -195,6 +197,14 @@ class TransfoXLModelTest(unittest.TestCase): os.remove(json_file_path) self.assertEqual(config_second.to_dict(), config_first.to_dict()) + @pytest.mark.slow + def test_model_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: + model = TransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(model) + def run_tester(self, tester): config_and_inputs = tester.prepare_config_and_inputs() diff --git a/tests/tokenization_gpt2_test.py b/tests/tokenization_gpt2_test.py index 0773574360..870f61ca79 100644 --- a/tests/tokenization_gpt2_test.py +++ b/tests/tokenization_gpt2_test.py @@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest import json +import shutil +import pytest -from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer +from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP class GPT2TokenizationTest(unittest.TestCase): @@ -64,6 +66,13 @@ class GPT2TokenizationTest(unittest.TestCase): [tokenizer_2.encoder, tokenizer_2.decoder, tokenizer_2.bpe_ranks, tokenizer_2.special_tokens, tokenizer_2.special_tokens_decoder]) + @pytest.mark.slow + def test_tokenizer_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: + tokenizer = GPT2Tokenizer.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(tokenizer) if __name__ == '__main__': unittest.main() diff --git a/tests/tokenization_openai_test.py b/tests/tokenization_openai_test.py index 2011ccc1df..a57f86be57 100644 --- a/tests/tokenization_openai_test.py +++ b/tests/tokenization_openai_test.py @@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest import json +import shutil +import pytest -from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer +from pytorch_pretrained_bert.tokenization_openai import OpenAIGPTTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP class OpenAIGPTTokenizationTest(unittest.TestCase): @@ -64,6 +66,14 @@ class OpenAIGPTTokenizationTest(unittest.TestCase): [tokenizer_2.encoder, tokenizer_2.decoder, tokenizer_2.bpe_ranks, tokenizer_2.special_tokens, tokenizer_2.special_tokens_decoder]) + @pytest.mark.slow + def test_tokenizer_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: + tokenizer = OpenAIGPTTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(tokenizer) + if __name__ == '__main__': unittest.main() diff --git a/tests/tokenization_test.py b/tests/tokenization_test.py index 15cc7ccd82..fe120a522c 100644 --- a/tests/tokenization_test.py +++ b/tests/tokenization_test.py @@ -17,12 +17,14 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest from io import open +import shutil +import pytest from pytorch_pretrained_bert.tokenization import (BasicTokenizer, BertTokenizer, WordpieceTokenizer, _is_control, _is_punctuation, - _is_whitespace) + _is_whitespace, PRETRAINED_VOCAB_ARCHIVE_MAP) class TokenizationTest(unittest.TestCase): @@ -56,6 +58,13 @@ class TokenizationTest(unittest.TestCase): self.assertListEqual( tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) + @pytest.mark.slow + def test_tokenizer_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: + tokenizer = BertTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(tokenizer) def test_chinese(self): tokenizer = BasicTokenizer() diff --git a/tests/tokenization_transfo_xl_test.py b/tests/tokenization_transfo_xl_test.py index 1a805f11e6..bf0ac5db2f 100644 --- a/tests/tokenization_transfo_xl_test.py +++ b/tests/tokenization_transfo_xl_test.py @@ -17,8 +17,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest from io import open +import shutil +import pytest -from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer +from pytorch_pretrained_bert.tokenization_transfo_xl import TransfoXLTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP class TransfoXLTokenizationTest(unittest.TestCase): @@ -66,6 +68,13 @@ class TransfoXLTokenizationTest(unittest.TestCase): tokenizer.tokenize(u" \tHeLLo ! how \n Are yoU ? "), ["HeLLo", "!", "how", "Are", "yoU", "?"]) + @pytest.mark.slow + def test_tokenizer_from_pretrained(self): + cache_dir = "/tmp/pytorch_pretrained_bert_test/" + for model_name in list(PRETRAINED_VOCAB_ARCHIVE_MAP.keys())[:1]: + tokenizer = TransfoXLTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + shutil.rmtree(cache_dir) + self.assertIsNotNone(tokenizer) if __name__ == '__main__': unittest.main()