Fix gradient checkpointing + fp16 autocast for most models (#24247)

* fix gc bug

* continue PoC on OPT

* fixes

* 🤯

* fix tests

* remove pytest.mark

* fixup

* forward contrib credits from discussions

* forward contrib credits from discussions

* reverting changes on untouched files.

---------

Co-authored-by: zhaoqf123 <zhaoqf123@users.noreply.github.com>
Co-authored-by: 7eu7d7 <7eu7d7@users.noreply.github.com>
This commit is contained in:
Younes Belkada
2023-06-21 17:04:59 +02:00
committed by GitHub
parent 1815d1865e
commit 285a48011d
179 changed files with 836 additions and 271 deletions

View File

@@ -43,6 +43,7 @@ from ...modeling_outputs import (
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel, SequenceSummary
from ...pytorch_utils import torch_custom_checkpointing
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
@@ -550,7 +551,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder(nn.Module):
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
layer_outputs = torch_custom_checkpointing(
create_custom_forward(layer_module),
hidden_states,
attention_mask,
@@ -1585,6 +1586,7 @@ from ...modeling_outputs import (
CausalLMOutputWithCrossAttentions
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_custom_checkpointing
from ...utils import logging
from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Config
@@ -2318,7 +2320,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
layer_outputs = torch_custom_checkpointing(
create_custom_forward(encoder_layer),
hidden_states,
attention_mask,
@@ -2557,7 +2559,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
layer_outputs = torch_custom_checkpointing(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,