Mark big downloads slow (#7325)

* Make big downloads as slow

* Add import

* Right order for slow decorator

* More slow tests
This commit is contained in:
Sylvain Gugger
2020-09-22 12:21:52 -04:00
committed by GitHub
parent 585217c87f
commit 1ee2194fb6
3 changed files with 22 additions and 11 deletions

View File

@@ -1,7 +1,7 @@
import unittest
from transformers import AutoTokenizer, is_torch_available
from transformers.testing_utils import require_torch
from transformers.testing_utils import require_torch, slow
if is_torch_available():
@@ -69,6 +69,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertTrue("labels" not in batch)
self.assertEqual(batch["inputs"].shape, torch.Size([8, 6]))
@slow
def test_default_classification(self):
MODEL_ID = "bert-base-cased-finetuned-mrpc"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -80,6 +81,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
batch = data_collator(dataset.features)
self.assertEqual(batch["labels"].dtype, torch.long)
@slow
def test_default_regression(self):
MODEL_ID = "distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -91,6 +93,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
batch = data_collator(dataset.features)
self.assertEqual(batch["labels"].dtype, torch.float)
@slow
def test_lm_tokenizer_without_padding(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2")
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
@@ -109,6 +112,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 512)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 512)))
@slow
def test_lm_tokenizer_with_padding(self):
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
data_collator = DataCollatorForLanguageModeling(tokenizer)
@@ -128,6 +132,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["input_ids"].shape, torch.Size((2, 512)))
self.assertEqual(batch["labels"].shape, torch.Size((2, 512)))
@slow
def test_plm(self):
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
data_collator = DataCollatorForPermutationLanguageModeling(tokenizer)
@@ -156,6 +161,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
# Expect error due to odd sequence length
data_collator(example)
@slow
def test_nsp(self):
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
data_collator = DataCollatorForNextSentencePrediction(tokenizer)
@@ -172,6 +178,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
self.assertEqual(batch["masked_lm_labels"].shape, torch.Size((total_samples, 512)))
self.assertEqual(batch["next_sentence_label"].shape, torch.Size((total_samples,)))
@slow
def test_sop(self):
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
data_collator = DataCollatorForSOP(tokenizer)