From 2977bd528f06bada54afcf740219e65afd1c0883 Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Tue, 13 Oct 2020 16:22:29 -0400 Subject: [PATCH] Faster pegasus tokenization test with reduced data size (#7762) --- tests/test_tokenization_pegasus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tokenization_pegasus.py b/tests/test_tokenization_pegasus.py index ba3e84058d..ae186ac1f6 100644 --- a/tests/test_tokenization_pegasus.py +++ b/tests/test_tokenization_pegasus.py @@ -57,7 +57,7 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase): @require_torch def test_pegasus_large_seq2seq_truncation(self): - src_texts = ["This is going to be way too long" * 10000, "short example"] + src_texts = ["This is going to be way too long." * 150, "short example"] tgt_texts = ["not super long but more than 5 tokens", "tiny"] batch = self.pegasus_large_tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, max_target_length=5) assert batch.input_ids.shape == (2, 1024)