Return correct Bart hidden state tensors (#8747)

* bart output hidden states upstream

* same w/ decoder

* add tests

* fix prophetnet

* fix gpt2 and ctrl

* fix fstm and skip test for reformer and longformer

* fix all models

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Joe Davison
2020-11-25 16:06:04 -05:00
committed by GitHub
parent 138f45c184
commit 369f1d77b4
14 changed files with 199 additions and 54 deletions

View File

@@ -204,6 +204,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
self.model_tester.check_transfo_xl_lm_head_output(output_result)
def test_retain_grad_hidden_states_attentions(self):
# xlnet cannot keep gradients in attentions or hidden states
return
@require_torch_multi_gpu
def test_multi_gpu_data_parallel_forward(self):
# Opt-out of this test.