From b340a910ed50f0705975595c9b8c9ae26111c01d Mon Sep 17 00:00:00 2001 From: thomwolf Date: Mon, 4 Nov 2019 16:03:36 +0100 Subject: [PATCH] fix tests - flagged as slow all the tests downloading from AWS --- transformers/modeling_roberta.py | 2 +- transformers/tests/modeling_auto_test.py | 4 ++++ transformers/tests/modeling_common_test.py | 8 ++------ transformers/tests/modeling_encoder_decoder_test.py | 1 + transformers/tests/tokenization_auto_test.py | 1 + transformers/tests/tokenization_bert_test.py | 2 ++ transformers/tests/tokenization_distilbert_test.py | 2 ++ transformers/tests/tokenization_roberta_test.py | 2 ++ transformers/tests/tokenization_utils_test.py | 2 ++ transformers/tests/tokenization_xlm_test.py | 2 ++ transformers/tests/tokenization_xlnet_test.py | 2 ++ 11 files changed, 21 insertions(+), 7 deletions(-) diff --git a/transformers/modeling_roberta.py b/transformers/modeling_roberta.py index af7c0b65d9..cbf285fa95 100644 --- a/transformers/modeling_roberta.py +++ b/transformers/modeling_roberta.py @@ -173,7 +173,7 @@ class RobertaModel(BertModel): return self.embeddings.word_embeddings def set_input_embeddings(self, value): - self.embeddings.word_emebddings = value + self.embeddings.word_embeddings = value @add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """, ROBERTA_START_DOCSTRING, ROBERTA_INPUTS_DOCSTRING) diff --git a/transformers/tests/modeling_auto_test.py b/transformers/tests/modeling_auto_test.py index af1de29cce..6d2c7ec979 100644 --- a/transformers/tests/modeling_auto_test.py +++ b/transformers/tests/modeling_auto_test.py @@ -38,6 +38,7 @@ else: class AutoModelTest(unittest.TestCase): + @pytest.mark.slow def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: @@ -52,6 +53,7 @@ class AutoModelTest(unittest.TestCase): for value in loading_info.values(): self.assertEqual(len(value), 0) + @pytest.mark.slow def test_lmhead_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: @@ -64,6 +66,7 @@ class AutoModelTest(unittest.TestCase): self.assertIsNotNone(model) self.assertIsInstance(model, BertForMaskedLM) + @pytest.mark.slow def test_sequence_classification_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: @@ -76,6 +79,7 @@ class AutoModelTest(unittest.TestCase): self.assertIsNotNone(model) self.assertIsInstance(model, BertForSequenceClassification) + @pytest.mark.slow def test_question_answering_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: diff --git a/transformers/tests/modeling_common_test.py b/transformers/tests/modeling_common_test.py index 159d9d85bb..ddc0f9f3de 100644 --- a/transformers/tests/modeling_common_test.py +++ b/transformers/tests/modeling_common_test.py @@ -429,12 +429,6 @@ class CommonTestCases: list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size]) - def test_debug(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - for model_class in self.all_model_classes: - model = model_class(config) - model_embed = model.resize_token_embeddings(config.vocab_size + 10) - def test_resize_tokens_embeddings(self): original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() if not self.test_resize_embeddings: @@ -703,6 +697,7 @@ class CommonTestCases: config_and_inputs = self.prepare_config_and_inputs() self.create_and_check_presents(*config_and_inputs) + @pytest.mark.slow def run_slow_tests(self): self.create_and_check_model_from_pretrained() @@ -776,6 +771,7 @@ def floats_tensor(shape, scale=1.0, rng=None, name=None): class ModelUtilsTest(unittest.TestCase): + @pytest.mark.slow def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: diff --git a/transformers/tests/modeling_encoder_decoder_test.py b/transformers/tests/modeling_encoder_decoder_test.py index 1ffd0ebc4c..a6c88ed9a9 100644 --- a/transformers/tests/modeling_encoder_decoder_test.py +++ b/transformers/tests/modeling_encoder_decoder_test.py @@ -27,6 +27,7 @@ else: class EncoderDecoderModelTest(unittest.TestCase): + @pytest.mark.slow def test_model2model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: diff --git a/transformers/tests/tokenization_auto_test.py b/transformers/tests/tokenization_auto_test.py index 0f49ec75fb..79370811e8 100644 --- a/transformers/tests/tokenization_auto_test.py +++ b/transformers/tests/tokenization_auto_test.py @@ -26,6 +26,7 @@ from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_CON class AutoTokenizerTest(unittest.TestCase): + @pytest.mark.slow def test_tokenizer_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_CONFIG_ARCHIVE_MAP.keys())[:1]: diff --git a/transformers/tests/tokenization_bert_test.py b/transformers/tests/tokenization_bert_test.py index fd61ec30ba..73ea38e20a 100644 --- a/transformers/tests/tokenization_bert_test.py +++ b/transformers/tests/tokenization_bert_test.py @@ -16,6 +16,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest +import pytest from io import open from transformers.tokenization_bert import (BasicTokenizer, @@ -125,6 +126,7 @@ class BertTokenizationTest(CommonTestCases.CommonTokenizerTester): self.assertFalse(_is_punctuation(u"A")) self.assertFalse(_is_punctuation(u" ")) + @pytest.mark.slow def test_sequence_builders(self): tokenizer = self.tokenizer_class.from_pretrained("bert-base-uncased") diff --git a/transformers/tests/tokenization_distilbert_test.py b/transformers/tests/tokenization_distilbert_test.py index e3c8376ca8..77a487651d 100644 --- a/transformers/tests/tokenization_distilbert_test.py +++ b/transformers/tests/tokenization_distilbert_test.py @@ -16,6 +16,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest +import pytest from io import open from transformers.tokenization_distilbert import (DistilBertTokenizer) @@ -30,6 +31,7 @@ class DistilBertTokenizationTest(BertTokenizationTest): def get_tokenizer(self, **kwargs): return DistilBertTokenizer.from_pretrained(self.tmpdirname, **kwargs) + @pytest.mark.slow def test_sequence_builders(self): tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") diff --git a/transformers/tests/tokenization_roberta_test.py b/transformers/tests/tokenization_roberta_test.py index b31dd94f21..a27bf7d654 100644 --- a/transformers/tests/tokenization_roberta_test.py +++ b/transformers/tests/tokenization_roberta_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import json import unittest +import pytest from io import open from transformers.tokenization_roberta import RobertaTokenizer, VOCAB_FILES_NAMES @@ -78,6 +79,7 @@ class RobertaTokenizationTest(CommonTestCases.CommonTokenizerTester): [0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2] ) + @pytest.mark.slow def test_sequence_builders(self): tokenizer = RobertaTokenizer.from_pretrained("roberta-base") diff --git a/transformers/tests/tokenization_utils_test.py b/transformers/tests/tokenization_utils_test.py index cf55982c8f..8630191c69 100644 --- a/transformers/tests/tokenization_utils_test.py +++ b/transformers/tests/tokenization_utils_test.py @@ -18,11 +18,13 @@ from __future__ import print_function import unittest import six +import pytest from transformers import PreTrainedTokenizer from transformers.tokenization_gpt2 import GPT2Tokenizer class TokenizerUtilsTest(unittest.TestCase): + @pytest.mark.slow def check_tokenizer_from_pretrained(self, tokenizer_class): s3_models = list(tokenizer_class.max_model_input_sizes.keys()) for model_name in s3_models[:1]: diff --git a/transformers/tests/tokenization_xlm_test.py b/transformers/tests/tokenization_xlm_test.py index 567edf1ccd..3ff6564e34 100644 --- a/transformers/tests/tokenization_xlm_test.py +++ b/transformers/tests/tokenization_xlm_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest import json +import pytest from transformers.tokenization_xlm import XLMTokenizer, VOCAB_FILES_NAMES @@ -66,6 +67,7 @@ class XLMTokenizationTest(CommonTestCases.CommonTokenizerTester): self.assertListEqual( tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens) + @pytest.mark.slow def test_sequence_builders(self): tokenizer = XLMTokenizer.from_pretrained("xlm-mlm-en-2048") diff --git a/transformers/tests/tokenization_xlnet_test.py b/transformers/tests/tokenization_xlnet_test.py index 653968b9af..2e14ffeb82 100644 --- a/transformers/tests/tokenization_xlnet_test.py +++ b/transformers/tests/tokenization_xlnet_test.py @@ -16,6 +16,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import unittest +import pytest from transformers.tokenization_xlnet import (XLNetTokenizer, SPIECE_UNDERLINE) @@ -89,6 +90,7 @@ class XLNetTokenizationTest(CommonTestCases.CommonTokenizerTester): u'9', u'2', u'0', u'0', u'0', u',', SPIECE_UNDERLINE + u'and', SPIECE_UNDERLINE + u'this', SPIECE_UNDERLINE + u'is', SPIECE_UNDERLINE + u'f', u'al', u'se', u'.']) + @pytest.mark.slow def test_sequence_builders(self): tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")