Fix gradient checkpointing + fp16 autocast for most models (#24247)
* fix gc bug * continue PoC on OPT * fixes * 🤯 * fix tests * remove pytest.mark * fixup * forward contrib credits from discussions * forward contrib credits from discussions * reverting changes on untouched files. --------- Co-authored-by: zhaoqf123 <zhaoqf123@users.noreply.github.com> Co-authored-by: 7eu7d7 <7eu7d7@users.noreply.github.com>
This commit is contained in:
@@ -43,6 +43,7 @@ from ...modeling_outputs import (
|
||||
TokenClassifierOutput,
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel, SequenceSummary
|
||||
from ...pytorch_utils import torch_custom_checkpointing
|
||||
from ...pytorch_utils import (
|
||||
apply_chunking_to_forward,
|
||||
find_pruneable_heads_and_indices,
|
||||
@@ -550,7 +551,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder(nn.Module):
|
||||
|
||||
return custom_forward
|
||||
|
||||
layer_outputs = torch.utils.checkpoint.checkpoint(
|
||||
layer_outputs = torch_custom_checkpointing(
|
||||
create_custom_forward(layer_module),
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
@@ -1585,6 +1586,7 @@ from ...modeling_outputs import (
|
||||
CausalLMOutputWithCrossAttentions
|
||||
)
|
||||
from ...modeling_utils import PreTrainedModel
|
||||
from ...pytorch_utils import torch_custom_checkpointing
|
||||
from ...utils import logging
|
||||
from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Config
|
||||
|
||||
@@ -2318,7 +2320,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
|
||||
|
||||
return custom_forward
|
||||
|
||||
layer_outputs = torch.utils.checkpoint.checkpoint(
|
||||
layer_outputs = torch_custom_checkpointing(
|
||||
create_custom_forward(encoder_layer),
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
@@ -2557,7 +2559,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
|
||||
|
||||
return custom_forward
|
||||
|
||||
layer_outputs = torch.utils.checkpoint.checkpoint(
|
||||
layer_outputs = torch_custom_checkpointing(
|
||||
create_custom_forward(decoder_layer),
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
|
||||
Reference in New Issue
Block a user