Return correct Bart hidden state tensors (#8747)

* bart output hidden states upstream

* same w/ decoder

* add tests

* fix prophetnet

* fix gpt2 and ctrl

* fix fstm and skip test for reformer and longformer

* fix all models

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Joe Davison
2020-11-25 16:06:04 -05:00
committed by GitHub
parent 138f45c184
commit 369f1d77b4
14 changed files with 199 additions and 54 deletions

View File

@@ -689,6 +689,56 @@ class ModelTesterMixin:
check_hidden_states_output(inputs_dict, config, model_class)
def test_retain_grad_hidden_states_attentions(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
config.output_attentions = True
# no need to test all models as different heads yield the same functionality
model_class = self.all_model_classes[0]
model = model_class(config)
model.to(torch_device)
inputs = self._prepare_for_class(inputs_dict, model_class)
outputs = model(**inputs)
output = outputs[0]
if config.is_encoder_decoder:
# Seq2Seq models
encoder_hidden_states = outputs.encoder_hidden_states[0]
encoder_attentions = outputs.encoder_attentions[0]
encoder_hidden_states.retain_grad()
encoder_attentions.retain_grad()
decoder_hidden_states = outputs.decoder_hidden_states[0]
decoder_attentions = outputs.decoder_attentions[0]
decoder_hidden_states.retain_grad()
decoder_attentions.retain_grad()
cross_attentions = outputs.cross_attentions[0]
cross_attentions.retain_grad()
output.flatten()[0].backward(retain_graph=True)
self.assertIsNotNone(encoder_hidden_states.grad)
self.assertIsNotNone(encoder_attentions.grad)
self.assertIsNotNone(decoder_hidden_states.grad)
self.assertIsNotNone(decoder_attentions.grad)
self.assertIsNotNone(cross_attentions.grad)
else:
# Encoder-/Decoder-only models
hidden_states = outputs.hidden_states[0]
attentions = outputs.attentions[0]
hidden_states.retain_grad()
attentions.retain_grad()
output.flatten()[0].backward(retain_graph=True)
self.assertIsNotNone(hidden_states.grad)
self.assertIsNotNone(attentions.grad)
def test_feed_forward_chunking(self):
(
original_config,