[core/ GC / tests] Stronger GC tests (#27124)
* stronger GC tests * better tests and skip failing tests * break down into 3 sub-tests * break down into 3 sub-tests * refactor a bit * more refactor * fix * last nit * credits contrib and suggestions * credits contrib and suggestions --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -539,6 +539,44 @@ class ModelTesterMixin:
|
||||
expected_arg_names = ["input_ids"]
|
||||
self.assertListEqual(arg_names[:1], expected_arg_names)
|
||||
|
||||
def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
|
||||
if not self.model_tester.is_training:
|
||||
return
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.use_cache = False
|
||||
config.return_dict = True
|
||||
|
||||
if (
|
||||
model_class.__name__
|
||||
in [*get_values(MODEL_MAPPING_NAMES), *get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES)]
|
||||
or not model_class.supports_gradient_checkpointing
|
||||
):
|
||||
continue
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
model = model_class(config)
|
||||
|
||||
model.to(torch_device)
|
||||
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
|
||||
model.train()
|
||||
|
||||
# unfreeze additional layers
|
||||
for p in model.parameters():
|
||||
p.requires_grad_(True)
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
|
||||
|
||||
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
for k, v in model.named_parameters():
|
||||
if v.requires_grad:
|
||||
self.assertTrue(v.grad is not None, f"{k} in {model_class.__name__} has no gradient!")
|
||||
|
||||
def test_training(self):
|
||||
if not self.model_tester.is_training:
|
||||
return
|
||||
@@ -561,34 +599,18 @@ class ModelTesterMixin:
|
||||
loss.backward()
|
||||
|
||||
def test_training_gradient_checkpointing(self):
|
||||
if not self.model_tester.is_training:
|
||||
return
|
||||
# Scenario - 1 default behaviour
|
||||
self.check_training_gradient_checkpointing()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
config.use_cache = False
|
||||
config.return_dict = True
|
||||
def test_training_gradient_checkpointing_use_reentrant(self):
|
||||
# Scenario - 2 with `use_reentrant=True` - this is the default value that is used in pytorch's
|
||||
# torch.utils.checkpoint.checkpoint
|
||||
self.check_training_gradient_checkpointing(gradient_checkpointing_kwargs={"use_reentrant": True})
|
||||
|
||||
if (
|
||||
model_class.__name__
|
||||
in [*get_values(MODEL_MAPPING_NAMES), *get_values(MODEL_FOR_BACKBONE_MAPPING_NAMES)]
|
||||
or not model_class.supports_gradient_checkpointing
|
||||
):
|
||||
continue
|
||||
model = model_class(config)
|
||||
model.to(torch_device)
|
||||
model.gradient_checkpointing_enable()
|
||||
model.train()
|
||||
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
|
||||
model.gradient_checkpointing_disable()
|
||||
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": True})
|
||||
model.train()
|
||||
inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||
# Scenario - 3 with `use_reentrant=False` pytorch suggests users to use this value for
|
||||
# future releases: https://pytorch.org/docs/stable/checkpoint.html
|
||||
self.check_training_gradient_checkpointing(gradient_checkpointing_kwargs={"use_reentrant": False})
|
||||
|
||||
def test_attention_outputs(self):
|
||||
if not self.has_attentions:
|
||||
|
||||
Reference in New Issue
Block a user