Add slow generate tests for pretrained lm models (#2909)

* add slow generate lm_model tests * fix conflicts * merge conflicts * fix conflicts * add slow generate lm_model tests * make style * delete unused variable * fix conflicts * fix conflicts * fix conflicts * delete unused variable * fix conflicts * finished hard coded tests
2020-02-24 17:51:57 +01:00
parent 8194df8e0c
commit 17c45c39ed
8 changed files with 991 additions and 6 deletions
--- a/tests/test_modeling_xlm.py
+++ b/tests/test_modeling_xlm.py
@@ -24,6 +24,7 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device


 if is_torch_available():
+    import torch
    from transformers import (
        XLMConfig,
        XLMModel,
@@ -396,3 +397,48 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
        for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = XLMModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
+
+
+def prepare_generation_special_tokens():
+    return {"bos_token_id": 0, "pad_token_id": 2}
+
+
+class XLMModelLanguageGenerationTest(unittest.TestCase):
+
+    special_tokens = prepare_generation_special_tokens()
+
+    @slow
+    def test_lm_generate_xlm_mlm_en_2048(self):
+        model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
+        input_ids = torch.Tensor([[1, 14, 2232, 26, 1]]).long()  # The dog is cute
+        expected_output_ids = [
+            1,
+            14,
+            2232,
+            26,
+            1,
+            567,
+            26,
+            32,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+            149,
+        ]  # The dog is nothing is it!!!!!!!!!!!! TODO (PVP): this sentence (and others I tried) does not make much sense, there seems to be a problem with xlm language generation.
+        torch.manual_seed(0)
+
+        output_ids = model.generate(
+            input_ids,
+            bos_token_id=self.special_tokens["bos_token_id"],
+            pad_token_id=self.special_tokens["pad_token_id"],
+        )
+
+        self.assertListEqual(output_ids[0].tolist(), expected_output_ids)