Return correct Bart hidden state tensors (#8747)

* bart output hidden states upstream * same w/ decoder * add tests * fix prophetnet * fix gpt2 and ctrl * fix fstm and skip test for reformer and longformer * fix all models Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2020-11-25 16:06:04 -05:00
parent 138f45c184
commit 369f1d77b4
14 changed files with 199 additions and 54 deletions
--- a/tests/test_modeling_transfo_xl.py
+++ b/tests/test_modeling_transfo_xl.py
@@ -204,6 +204,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
        output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
        self.model_tester.check_transfo_xl_lm_head_output(output_result)

+    def test_retain_grad_hidden_states_attentions(self):
+        # xlnet cannot keep gradients in attentions or hidden states
+        return
+
    @require_torch_multi_gpu
    def test_multi_gpu_data_parallel_forward(self):
        # Opt-out of this test.