Refactor prepare_seq2seq_batch (#9524)

* Add target contextmanager and rework prepare_seq2seq_batch

* Fix tests, treat BART and Barthez

* Add last tokenizers

* Fix test

* Set src token before calling the superclass

* Remove special behavior for T5

* Remove needless imports

* Remove needless asserts
This commit is contained in:
Sylvain Gugger
2021-01-12 18:19:38 -05:00
committed by GitHub
parent e6ecef711e
commit 063d8d27f4
24 changed files with 169 additions and 700 deletions

View File

@@ -508,12 +508,6 @@ class TestMarian_en_ROMANCE(MarianIntegrationTest):
def test_batch_generation_en_ROMANCE_multi(self):
self._assert_generated_batch_equal_expected()
def test_tokenizer_handles_empty(self):
normalized = self.tokenizer.normalize("")
self.assertIsInstance(normalized, str)
with self.assertRaises(ValueError):
self.tokenizer.prepare_seq2seq_batch([""], return_tensors="pt")
@slow
def test_pipeline(self):
device = 0 if torch_device == "cuda" else -1