Fix gradient checkpointing + fp16 autocast for most models (#24247)
* fix gc bug * continue PoC on OPT * fixes * 🤯 * fix tests * remove pytest.mark * fixup * forward contrib credits from discussions * forward contrib credits from discussions * reverting changes on untouched files. --------- Co-authored-by: zhaoqf123 <zhaoqf123@users.noreply.github.com> Co-authored-by: 7eu7d7 <7eu7d7@users.noreply.github.com>
This commit is contained in:
@@ -609,6 +609,12 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_change_to_full_attn(*config_and_inputs)
|
||||
|
||||
@unittest.skip(
|
||||
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
|
||||
)
|
||||
def test_training_gradient_checkpointing_autocast(self):
|
||||
pass
|
||||
|
||||
# overwrite from common in order to skip the check on `attentions`
|
||||
def check_pt_flax_outputs(self, fx_outputs, pt_outputs, model_class, tol=1e-5, name="outputs", attributes=None):
|
||||
# `bigbird_block_sparse_attention` in `FlaxBigBird` returns `attention_probs = None`, while in PyTorch version,
|
||||
|
||||
Reference in New Issue
Block a user