[Tokenization] Fix #5181 - make #5155 more explicit - move back the default logging level in tests to WARNING (#5252)

* fix-5181

Padding to max sequence length while truncation to another length was wrong on slow tokenizers

* clean up and fix #5155

* fix XLM test

* Fix tests for Transfo-XL

* logging only above WARNING in tests

* switch slow tokenizers tests in @slow

* fix Marian truncation tokenization test

* style and quality

* make the test a lot faster by limiting the sequence length used in tests
This commit is contained in:
Thomas Wolf
2020-06-25 17:24:28 +02:00
committed by GitHub
parent e008d520bb
commit 27cf1d97f0
9 changed files with 134 additions and 75 deletions

View File

@@ -14,7 +14,6 @@
# limitations under the License.
import logging
import unittest
from transformers import is_tf_available
@@ -48,7 +47,6 @@ class TFAutoModelTest(unittest.TestCase):
self.assertTrue(h5py.version.hdf5_version.startswith("1.10"))
logging.basicConfig(level=logging.INFO)
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
@@ -65,7 +63,6 @@ class TFAutoModelTest(unittest.TestCase):
self.assertTrue(h5py.version.hdf5_version.startswith("1.10"))
logging.basicConfig(level=logging.INFO)
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
@@ -78,7 +75,6 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_lmhead_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO)
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
@@ -91,7 +87,6 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_sequence_classification_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO)
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
@@ -104,7 +99,6 @@ class TFAutoModelTest(unittest.TestCase):
@slow
def test_question_answering_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO)
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name)
@@ -116,14 +110,12 @@ class TFAutoModelTest(unittest.TestCase):
self.assertIsInstance(model, TFBertForQuestionAnswering)
def test_from_pretrained_identifier(self):
logging.basicConfig(level=logging.INFO)
model = TFAutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
self.assertIsInstance(model, TFBertForMaskedLM)
self.assertEqual(model.num_parameters(), 14830)
self.assertEqual(model.num_parameters(only_trainable=True), 14830)
def test_from_identifier_from_model_type(self):
logging.basicConfig(level=logging.INFO)
model = TFAutoModelWithLMHead.from_pretrained(DUMMY_UNKWOWN_IDENTIFIER)
self.assertIsInstance(model, TFRobertaForMaskedLM)
self.assertEqual(model.num_parameters(), 14830)