Add slow generate tests for pretrained lm models (#2909)

* add slow generate lm_model tests

* fix conflicts

* merge conflicts

* fix conflicts

* add slow generate lm_model tests

* make style

* delete unused variable

* fix conflicts

* fix conflicts

* fix conflicts

* delete unused variable

* fix conflicts

* finished hard coded tests
This commit is contained in:
Patrick von Platen
2020-02-24 17:51:57 +01:00
committed by GitHub
parent 8194df8e0c
commit 17c45c39ed
8 changed files with 991 additions and 6 deletions

View File

@@ -24,6 +24,7 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
import torch
from transformers import (
XLMConfig,
XLMModel,
@@ -396,3 +397,48 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = XLMModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model)
def prepare_generation_special_tokens():
return {"bos_token_id": 0, "pad_token_id": 2}
class XLMModelLanguageGenerationTest(unittest.TestCase):
special_tokens = prepare_generation_special_tokens()
@slow
def test_lm_generate_xlm_mlm_en_2048(self):
model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
input_ids = torch.Tensor([[1, 14, 2232, 26, 1]]).long() # The dog is cute
expected_output_ids = [
1,
14,
2232,
26,
1,
567,
26,
32,
149,
149,
149,
149,
149,
149,
149,
149,
149,
149,
149,
149,
] # The dog is nothing is it!!!!!!!!!!!! TODO (PVP): this sentence (and others I tried) does not make much sense, there seems to be a problem with xlm language generation.
torch.manual_seed(0)
output_ids = model.generate(
input_ids,
bos_token_id=self.special_tokens["bos_token_id"],
pad_token_id=self.special_tokens["pad_token_id"],
)
self.assertListEqual(output_ids[0].tolist(), expected_output_ids)