From 14cb5b35faeda7881341656aacf89d12a8a7e07b Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Wed, 20 May 2020 11:59:45 -0400 Subject: [PATCH] Fix slow gpu tests lysandre (#4487) * There is one missing key in BERT * Correct device for CamemBERT model * RoBERTa tokenization adding prefix space * Style --- tests/test_modeling_auto.py | 5 +++-- tests/test_modeling_camembert.py | 1 + tests/test_tokenization_roberta.py | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_modeling_auto.py b/tests/test_modeling_auto.py index 43ace9898e..4c7779344c 100644 --- a/tests/test_modeling_auto.py +++ b/tests/test_modeling_auto.py @@ -80,8 +80,9 @@ class AutoModelTest(unittest.TestCase): model, loading_info = AutoModelForPreTraining.from_pretrained(model_name, output_loading_info=True) self.assertIsNotNone(model) self.assertIsInstance(model, BertForPreTraining) - for value in loading_info.values(): - self.assertEqual(len(value), 0) + for key, value in loading_info.items(): + # Only one value should not be initialized and in the missing keys. + self.assertEqual(len(value), 1 if key == "missing_keys" else 0) @slow def test_lmhead_model_from_pretrained(self): diff --git a/tests/test_modeling_camembert.py b/tests/test_modeling_camembert.py index 7ebd0895a5..5d57f862c8 100644 --- a/tests/test_modeling_camembert.py +++ b/tests/test_modeling_camembert.py @@ -30,6 +30,7 @@ class CamembertModelIntegrationTest(unittest.TestCase): @slow def test_output_embeds_base_model(self): model = CamembertModel.from_pretrained("camembert-base") + model.to(torch_device) input_ids = torch.tensor( [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], device=torch_device, dtype=torch.long, diff --git a/tests/test_tokenization_roberta.py b/tests/test_tokenization_roberta.py index 19075ef531..fa31f66694 100644 --- a/tests/test_tokenization_roberta.py +++ b/tests/test_tokenization_roberta.py @@ -100,9 +100,11 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): text = tokenizer.encode("sequence builders", add_special_tokens=False) text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False) - encoded_text_from_decode = tokenizer.encode("sequence builders", add_special_tokens=True) + encoded_text_from_decode = tokenizer.encode( + "sequence builders", add_special_tokens=True, add_prefix_space=False + ) encoded_pair_from_decode = tokenizer.encode( - "sequence builders", "multi-sequence build", add_special_tokens=True + "sequence builders", "multi-sequence build", add_special_tokens=True, add_prefix_space=False ) encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)