Tokenizers should be framework agnostic (#8599)

* Tokenizers should be framework agnostic * Run the slow tests * Not testing * Fix documentation * Apply suggestions from code review Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2020-11-17 14:03:03 -05:00
parent 7f3b41a306
commit 3095ee9dab
28 changed files with 73 additions and 177 deletions
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -1794,7 +1794,7 @@ class TokenizerTesterMixin:
        self.assertEqual(batch.input_ids.shape[1], 3)
        self.assertEqual(batch.labels.shape[1], 10)
        # max_target_length will default to max_length if not specified
-        batch = tokenizer.prepare_seq2seq_batch(src_text, tgt_texts=tgt_text, max_length=3)
+        batch = tokenizer.prepare_seq2seq_batch(src_text, tgt_texts=tgt_text, max_length=3, return_tensors="pt")
        self.assertEqual(batch.input_ids.shape[1], 3)
        self.assertEqual(batch.labels.shape[1], 3)