Fix gradient checkpointing + fp16 autocast for most models (#24247)

* fix gc bug

* continue PoC on OPT

* fixes

* 🤯

* fix tests

* remove pytest.mark

* fixup

* forward contrib credits from discussions

* forward contrib credits from discussions

* reverting changes on untouched files.

---------

Co-authored-by: zhaoqf123 <zhaoqf123@users.noreply.github.com>
Co-authored-by: 7eu7d7 <7eu7d7@users.noreply.github.com>
This commit is contained in:
Younes Belkada
2023-06-21 17:04:59 +02:00
committed by GitHub
parent 1815d1865e
commit 285a48011d
179 changed files with 836 additions and 271 deletions

View File

@@ -352,6 +352,12 @@ class AlignTextModelTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="ALIGN does not use inputs_embeds")
def test_inputs_embeds(self):
pass

View File

@@ -186,6 +186,12 @@ class AltCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="AltCLIPVisionModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass

View File

@@ -238,6 +238,12 @@ class AutoformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
def test_resize_tokens_embeddings(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# # Input is 'static_categorical_features' not 'input_ids'
def test_model_main_input_name(self):
model_signature = inspect.signature(getattr(AutoformerModel, "forward"))

View File

@@ -227,6 +227,12 @@ class BeitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_multi_gpu_data_parallel_forward(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -609,6 +609,12 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_change_to_full_attn(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# overwrite from common in order to skip the check on `attentions`
def check_pt_flax_outputs(self, fx_outputs, pt_outputs, model_class, tol=1e-5, name="outputs", attributes=None):
# `bigbird_block_sparse_attention` in `FlaxBigBird` returns `attention_probs = None`, while in PyTorch version,

View File

@@ -789,6 +789,12 @@ class BlipTextRetrievalModelTest(ModelTesterMixin, unittest.TestCase):
def test_model_common_attributes(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -499,6 +499,12 @@ class CanineModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
# ViT does not use inputs_embeds
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip("CANINE does not have a get_input_embeddings() method.")
def test_model_common_attributes(self):
pass

View File

@@ -395,6 +395,12 @@ class ChineseCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="ChineseCLIPTextModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass
@@ -469,6 +475,12 @@ class ChineseCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_save_load_fast_init_to_base(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in CHINESE_CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -227,6 +227,12 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="CLIPVisionModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass

View File

@@ -202,6 +202,12 @@ class CLIPSegVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="CLIPSegVisionModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass
@@ -448,6 +454,12 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def test_hidden_states_output(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="Inputs_embeds is tested in individual model tests")
def test_inputs_embeds(self):
pass

View File

@@ -310,6 +310,12 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in DATA2VEC_VISION_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -182,6 +182,12 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_inputs_embeds(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -196,6 +196,12 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_inputs_embeds(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -185,6 +185,12 @@ class FlavaImageModelTest(ModelTesterMixin, unittest.TestCase):
# FLAVA does not use inputs_embeds
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -462,6 +468,12 @@ class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):
# FLAVA does not use inputs_embeds
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# skip this test as FlavaTextModel has no base class and is
# not available in MODEL_MAPPING
def test_save_load_fast_init_from_base(self):
@@ -624,6 +636,12 @@ class FlavaMultimodalModelTest(ModelTesterMixin, unittest.TestCase):
def test_save_load_fast_init_to_base(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -731,6 +749,12 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
def test_save_load_fast_init_to_base(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in FLAVA_CODEBOOK_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
@@ -1156,6 +1180,12 @@ class FlavaForPreTrainingTest(FlavaModelTest):
class_for_tester = FlavaForPreTrainingTester
test_torchscript = False
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# We will verify our results on an image of cute cats
def prepare_img():

View File

@@ -444,6 +444,12 @@ class FNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in FNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -562,6 +562,12 @@ class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_gpt2_weight_initialization(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_batch_generation(self):
model = GPT2LMHeadModel.from_pretrained("gpt2")

View File

@@ -356,6 +356,12 @@ class GraphormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
def test_feed_forward_chunking(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="Graphormer does not share input and output embeddings")
def test_model_common_attributes(self):
pass

View File

@@ -304,6 +304,12 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_imagegpt_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_imagegpt_model(*config_and_inputs)

View File

@@ -216,6 +216,12 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)

View File

@@ -279,6 +279,12 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def prepare_layoutlm_batch_inputs():
# Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on:

View File

@@ -275,6 +275,12 @@ class LiltModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in LILT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -697,6 +697,12 @@ class LukeModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in LUKE_PRETRAINED_MODEL_ARCHIVE_LIST:

View File

@@ -263,6 +263,12 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_save_load_strict(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:

View File

@@ -155,6 +155,12 @@ class OwlViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_inputs_embeds(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_common_attributes(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -633,6 +639,12 @@ class OwlViTForObjectDetectionTest(ModelTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return

View File

@@ -280,6 +280,12 @@ class PegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"]

View File

@@ -332,6 +332,12 @@ class Pix2StructTextModelTest(ModelTesterMixin, unittest.TestCase):
def test_training(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="Training is tested directly on `Pix2StructTextImageModelTest`")
def test_training_gradient_checkpointing(self):
pass

View File

@@ -161,6 +161,12 @@ class RegNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_common_attributes(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -452,6 +452,12 @@ class RoFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_model_as_decoder_with_default_input_mask(self):
# This regression test was failing with PyTorch < 1.3
(

View File

@@ -421,6 +421,12 @@ class SamModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@unittest.skip(reason="SamModel has no base class and is not available in MODEL_MAPPING")
def test_save_load_fast_init_from_base(self):
pass

View File

@@ -324,6 +324,12 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
def test_training_gradient_checkpointing(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
input_features = input_dict["input_features"]

View File

@@ -613,6 +613,12 @@ class SwitchTransformersModelTest(ModelTesterMixin, GenerationTesterMixin, Pipel
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_beam_sample_generate_dict_output(self):
r"""

View File

@@ -200,6 +200,12 @@ class TimeSeriesTransformerModelTest(ModelTesterMixin, PipelineTesterMixin, unit
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_save_load_strict(self):
config, _ = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:

View File

@@ -243,6 +243,12 @@ class VanModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
model = VanModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# We will verify our results on an image of cute cats
def prepare_img():

View File

@@ -340,6 +340,12 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_outputs_equivalence(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True

View File

@@ -549,6 +549,12 @@ class VisualBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_flickr()
self.model_tester.create_and_check_for_flickr(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in VISUAL_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -208,6 +208,12 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_pretraining(*config_and_inputs)
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
# overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise
# to generate masks during test
def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict):

View File

@@ -202,6 +202,12 @@ class XCLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
def test_save_load_fast_init_to_base(self):
pass
@unittest.skip(
reason="The model does not support GC + autocast + fp16: https://github.com/huggingface/transformers/pull/24247"
)
def test_training_gradient_checkpointing_autocast(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in XCLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: