ProphetNet (#7157)

* add new model prophetnet

prophetnet modified

modify codes as suggested v1

add prophetnet test files

* still bugs, because of changed output formats of encoder and decoder

* move prophetnet into the latest version

* clean integration tests

* clean tokenizers

* add xlm config to init

* correct typo in init

* further refactoring

* continue refactor

* save parallel

* add decoder_attention_mask

* fix use_cache vs. past_key_values

* fix common tests

* change decoder output logits

* fix xlm tests

* make common tests pass

* change model architecture

* add tokenizer tests

* finalize model structure

* no weight mapping

* correct n-gram stream attention mask as discussed with qweizhen

* remove unused import

* fix index.rst

* fix tests

* delete unnecessary code

* add fast integration test

* rename weights

* final weight remapping

* save intermediate

* Descriptions for Prophetnet Config File

* finish all models

* finish new model outputs

* delete unnecessary files

* refactor encoder layer

* add dummy docs

* code quality

* fix tests

* add model pages to doctree

* further refactor

* more refactor, more tests

* finish code refactor and tests

* remove unnecessary files

* further clean up

* add docstring template

* finish tokenizer doc

* finish prophetnet

* fix copies

* fix typos

* fix tf tests

* fix fp16

* fix tf test 2nd try

* fix code quality

* add test for each model

* merge new tests to branch

* Update model_cards/microsoft/prophetnet-large-uncased-cnndm/README.md

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* Update model_cards/microsoft/prophetnet-large-uncased-cnndm/README.md

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* Update src/transformers/modeling_prophetnet.py

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* Update utils/check_repo.py

Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

* apply sams and sylvains comments

* make style

* remove unnecessary code

* Update README.md

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update README.md

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/configuration_prophetnet.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

* implement lysandres comments

* correct docs

* fix isort

* fix tokenizers

* fix copies

Co-authored-by: weizhen <weizhen@mail.ustc.edu.cn>
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
Weizhen
2020-10-19 23:36:09 +08:00
committed by GitHub
parent 8f8f8d99fc
commit 2422cda01b
38 changed files with 5288 additions and 48 deletions

View File

@@ -112,6 +112,7 @@ class ModelTesterMixin:
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
out_2 = outputs[0].cpu().numpy()
out_2[np.isnan(out_2)] = 0
@@ -152,6 +153,7 @@ class ModelTesterMixin:
with torch.no_grad():
first = model(**self._prepare_for_class(inputs_dict, model_class))[0]
second = model(**self._prepare_for_class(inputs_dict, model_class))[0]
out_1 = first.cpu().numpy()
out_2 = second.cpu().numpy()
out_1 = out_1[~np.isnan(out_1)]
@@ -183,10 +185,12 @@ class ModelTesterMixin:
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_len = getattr(self.model_tester, "seq_length", None)
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len)
decoder_key_length = getattr(self.model_tester, "key_length", decoder_seq_length)
decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
chunk_length = getattr(self.model_tester, "chunk_length", None)
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"):
@@ -211,7 +215,7 @@ class ModelTesterMixin:
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
attentions = outputs[-1]
attentions = outputs["attentions"] if "attentions" in outputs.keys() else outputs[-1]
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
if chunk_length is not None:
@@ -227,8 +231,14 @@ class ModelTesterMixin:
out_len = len(outputs)
if self.is_encoder_decoder:
correct_outlen = 4
decoder_attention_idx = 1
correct_outlen = (
self.model_tester.base_model_out_len if hasattr(self.model_tester, "base_model_out_len") else 4
)
decoder_attention_idx = (
self.model_tester.decoder_attention_idx
if hasattr(self.model_tester, "decoder_attention_idx")
else 1
)
# loss is at first position
if "labels" in inputs_dict:
@@ -238,6 +248,7 @@ class ModelTesterMixin:
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
decoder_attention_idx += 1
self.assertEqual(out_len, correct_outlen)
decoder_attentions = outputs[decoder_attention_idx]
@@ -256,9 +267,16 @@ class ModelTesterMixin:
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))
self_attentions = outputs[-1]
if hasattr(self.model_tester, "num_hidden_states_types"):
added_hidden_states = self.model_tester.num_hidden_states_types
elif self.is_encoder_decoder:
added_hidden_states = 2
else:
added_hidden_states = 1
self.assertEqual(out_len + added_hidden_states, len(outputs))
self_attentions = outputs["attentions"] if "attentions" in outputs else outputs[-1]
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
if chunk_length is not None:
self.assertListEqual(
@@ -571,8 +589,8 @@ class ModelTesterMixin:
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs[-1]
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
@@ -822,6 +840,7 @@ class ModelTesterMixin:
model.eval()
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
if not self.is_encoder_decoder:
input_ids = inputs["input_ids"]
del inputs["input_ids"]
@@ -839,7 +858,7 @@ class ModelTesterMixin:
inputs["decoder_inputs_embeds"] = wte(decoder_input_ids)
with torch.no_grad():
model(**inputs)
model(**inputs)[0]
def test_lm_head_model_random_no_beam_search_generate(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()