Deprecate prepare_seq2seq_batch (#10287)

* Deprecate prepare_seq2seq_batch * Fix last tests * Apply suggestions from code review Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Suraj Patil <surajp815@gmail.com> * More review comments Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Suraj Patil <surajp815@gmail.com>
2021-02-22 12:36:16 -05:00
parent e73a3e1891
commit 9e147d31f6
31 changed files with 325 additions and 320 deletions
--- a/tests/test_tokenization_prophetnet.py
+++ b/tests/test_tokenization_prophetnet.py
@@ -152,20 +152,12 @@ class ProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"])

    @require_torch
-    def test_prepare_seq2seq_batch(self):
+    def test_prepare_batch(self):
        tokenizer = self.tokenizer_class.from_pretrained("microsoft/prophetnet-large-uncased")

        src_text = ["A long paragraph for summarization.", "Another paragraph for summarization."]
-        tgt_text = [
-            "Summary of the text.",
-            "Another summary.",
-        ]
        expected_src_tokens = [1037, 2146, 20423, 2005, 7680, 7849, 3989, 1012, 102]
-        batch = tokenizer.prepare_seq2seq_batch(
-            src_text,
-            tgt_texts=tgt_text,
-            return_tensors="pt",
-        )
+        batch = tokenizer(src_text, padding=True, return_tensors="pt")
        self.assertIsInstance(batch, BatchEncoding)
        result = list(batch.input_ids.numpy()[0])
        self.assertListEqual(expected_src_tokens, result)