ProphetNet (#7157)
* add new model prophetnet prophetnet modified modify codes as suggested v1 add prophetnet test files * still bugs, because of changed output formats of encoder and decoder * move prophetnet into the latest version * clean integration tests * clean tokenizers * add xlm config to init * correct typo in init * further refactoring * continue refactor * save parallel * add decoder_attention_mask * fix use_cache vs. past_key_values * fix common tests * change decoder output logits * fix xlm tests * make common tests pass * change model architecture * add tokenizer tests * finalize model structure * no weight mapping * correct n-gram stream attention mask as discussed with qweizhen * remove unused import * fix index.rst * fix tests * delete unnecessary code * add fast integration test * rename weights * final weight remapping * save intermediate * Descriptions for Prophetnet Config File * finish all models * finish new model outputs * delete unnecessary files * refactor encoder layer * add dummy docs * code quality * fix tests * add model pages to doctree * further refactor * more refactor, more tests * finish code refactor and tests * remove unnecessary files * further clean up * add docstring template * finish tokenizer doc * finish prophetnet * fix copies * fix typos * fix tf tests * fix fp16 * fix tf test 2nd try * fix code quality * add test for each model * merge new tests to branch * Update model_cards/microsoft/prophetnet-large-uncased-cnndm/README.md Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * Update model_cards/microsoft/prophetnet-large-uncased-cnndm/README.md Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * Update src/transformers/modeling_prophetnet.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * Update utils/check_repo.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * apply sams and sylvains comments * make style * remove unnecessary code * Update README.md Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update README.md Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/configuration_prophetnet.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * implement lysandres comments * correct docs * fix isort * fix tokenizers * fix copies Co-authored-by: weizhen <weizhen@mail.ustc.edu.cn> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -112,6 +112,7 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
out_2 = outputs[0].cpu().numpy()
|
||||
out_2[np.isnan(out_2)] = 0
|
||||
|
||||
@@ -152,6 +153,7 @@ class ModelTesterMixin:
|
||||
with torch.no_grad():
|
||||
first = model(**self._prepare_for_class(inputs_dict, model_class))[0]
|
||||
second = model(**self._prepare_for_class(inputs_dict, model_class))[0]
|
||||
|
||||
out_1 = first.cpu().numpy()
|
||||
out_2 = second.cpu().numpy()
|
||||
out_1 = out_1[~np.isnan(out_1)]
|
||||
@@ -183,10 +185,12 @@ class ModelTesterMixin:
|
||||
|
||||
def test_attention_outputs(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.return_dict = True
|
||||
|
||||
seq_len = getattr(self.model_tester, "seq_length", None)
|
||||
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
|
||||
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len)
|
||||
decoder_key_length = getattr(self.model_tester, "key_length", decoder_seq_length)
|
||||
decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length)
|
||||
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
|
||||
chunk_length = getattr(self.model_tester, "chunk_length", None)
|
||||
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"):
|
||||
@@ -211,7 +215,7 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
|
||||
attentions = outputs[-1]
|
||||
attentions = outputs["attentions"] if "attentions" in outputs.keys() else outputs[-1]
|
||||
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
||||
|
||||
if chunk_length is not None:
|
||||
@@ -227,8 +231,14 @@ class ModelTesterMixin:
|
||||
out_len = len(outputs)
|
||||
|
||||
if self.is_encoder_decoder:
|
||||
correct_outlen = 4
|
||||
decoder_attention_idx = 1
|
||||
correct_outlen = (
|
||||
self.model_tester.base_model_out_len if hasattr(self.model_tester, "base_model_out_len") else 4
|
||||
)
|
||||
decoder_attention_idx = (
|
||||
self.model_tester.decoder_attention_idx
|
||||
if hasattr(self.model_tester, "decoder_attention_idx")
|
||||
else 1
|
||||
)
|
||||
|
||||
# loss is at first position
|
||||
if "labels" in inputs_dict:
|
||||
@@ -238,6 +248,7 @@ class ModelTesterMixin:
|
||||
if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values():
|
||||
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
|
||||
decoder_attention_idx += 1
|
||||
|
||||
self.assertEqual(out_len, correct_outlen)
|
||||
|
||||
decoder_attentions = outputs[decoder_attention_idx]
|
||||
@@ -256,9 +267,16 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))
|
||||
|
||||
self_attentions = outputs[-1]
|
||||
if hasattr(self.model_tester, "num_hidden_states_types"):
|
||||
added_hidden_states = self.model_tester.num_hidden_states_types
|
||||
elif self.is_encoder_decoder:
|
||||
added_hidden_states = 2
|
||||
else:
|
||||
added_hidden_states = 1
|
||||
self.assertEqual(out_len + added_hidden_states, len(outputs))
|
||||
|
||||
self_attentions = outputs["attentions"] if "attentions" in outputs else outputs[-1]
|
||||
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
|
||||
if chunk_length is not None:
|
||||
self.assertListEqual(
|
||||
@@ -571,8 +589,8 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
hidden_states = outputs[-1]
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
|
||||
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
|
||||
|
||||
expected_num_layers = getattr(
|
||||
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
|
||||
@@ -822,6 +840,7 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
|
||||
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
if not self.is_encoder_decoder:
|
||||
input_ids = inputs["input_ids"]
|
||||
del inputs["input_ids"]
|
||||
@@ -839,7 +858,7 @@ class ModelTesterMixin:
|
||||
inputs["decoder_inputs_embeds"] = wte(decoder_input_ids)
|
||||
|
||||
with torch.no_grad():
|
||||
model(**inputs)
|
||||
model(**inputs)[0]
|
||||
|
||||
def test_lm_head_model_random_no_beam_search_generate(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
Reference in New Issue
Block a user