ByT5 model (#11971)

* allow tf to use uneven num of layers * add tokenizer * finish docs * finish docs * Apply suggestions from code review * include in index * finish * Update docs/source/model_doc/byt5.rst Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> * apply sylvais suggestions * make style Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com>
2021-06-01 19:07:37 +01:00
parent 1eb58b4560
commit 47a98fc4cb
12 changed files with 744 additions and 50 deletions
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -26,7 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
 if is_tf_available():
    import tensorflow as tf

-    from transformers import T5Tokenizer, TFT5EncoderModel, TFT5ForConditionalGeneration, TFT5Model
+    from transformers import ByT5Tokenizer, T5Tokenizer, TFT5EncoderModel, TFT5ForConditionalGeneration, TFT5Model


 class TFT5ModelTester:
@@ -499,6 +499,30 @@ class TFT5ModelIntegrationTests(unittest.TestCase):
        EXPECTED_SCORE = -59.0293
        self.assertTrue(abs(mtf_score - EXPECTED_SCORE) < 1e-4)

+    @slow
+    def test_small_byt5_integration_test(self):
+        """
+        For comparision run:
+        >>> import t5  # pip install t5==0.9.1
+
+        >>> path_to_byt5_small_checkpoint = '<fill_in>'
+        >>> t5_model = t5.models.MtfModel(model_dir=path_to_tf_checkpoint, batch_size=1, tpu=None)
+        >>> vocab = t5.data.ByteVocabulary()
+        >>> score = t5_model.score(inputs=["Hello there"], targets=["Hi I am"], vocabulary=vocab)
+        """
+
+        model = TFT5ForConditionalGeneration.from_pretrained("google/byt5-small")
+        tokenizer = ByT5Tokenizer.from_pretrained("google/byt5-small")
+
+        input_ids = tokenizer("Hello there", return_tensors="tf").input_ids
+        labels = tokenizer("Hi I am", return_tensors="tf").input_ids
+
+        loss = model(input_ids, labels=labels).loss
+        mtf_score = -tf.math.reduce_sum(loss).numpy()
+
+        EXPECTED_SCORE = -60.7397
+        self.assertTrue(abs(mtf_score - EXPECTED_SCORE) < 1e-4)
+
    @slow
    def test_summarization(self):
        model = self.model