From 80dbbd103c217f422de91a3265bf6d8e8bc414f7 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 16 Jan 2025 17:03:20 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20remove=20`generate`-related=20ob?= =?UTF-8?q?jects=20and=20methods=20scheduled=20for=20removal=20in=20v4.48?= =?UTF-8?q?=20(#35677)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove things scheduled for removal * make fixup --- src/transformers/__init__.py | 2 -- src/transformers/cache_utils.py | 11 ------- src/transformers/generation/__init__.py | 4 --- src/transformers/generation/logits_process.py | 16 ---------- .../models/gpt_neox/modeling_gpt_neox.py | 32 +------------------ src/transformers/utils/dummy_pt_objects.py | 7 ---- utils/check_docstrings.py | 1 - utils/check_repo.py | 1 - 8 files changed, 1 insertion(+), 73 deletions(-) diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 1cf0f88ad6..39a9c56234 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1377,7 +1377,6 @@ else: "LogitNormalization", "LogitsProcessor", "LogitsProcessorList", - "LogitsWarper", "MaxLengthCriteria", "MaxTimeCriteria", "MinLengthLogitsProcessor", @@ -6460,7 +6459,6 @@ if TYPE_CHECKING: LogitNormalization, LogitsProcessor, LogitsProcessorList, - LogitsWarper, MaxLengthCriteria, MaxTimeCriteria, MinLengthLogitsProcessor, diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index ad497581c0..e616adbe67 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -63,17 +63,6 @@ class Cache(torch.nn.Module): # TODO: deprecate this function in favor of `cache_position` raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.") - # Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length" - # Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles - # infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so - # we change naming to be more explicit - def get_max_length(self) -> Optional[int]: - logger.warning_once( - "`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. " - "Calling `get_max_cache()` will raise error from v4.48" - ) - return self.get_max_cache_shape() - def get_max_cache_shape(self) -> Optional[int]: """Returns the maximum sequence length (i.e. max capacity) of the cache object""" raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.") diff --git a/src/transformers/generation/__init__.py b/src/transformers/generation/__init__.py index d3eb10c1e6..ea39e8a10b 100644 --- a/src/transformers/generation/__init__.py +++ b/src/transformers/generation/__init__.py @@ -68,7 +68,6 @@ else: "LogitNormalization", "LogitsProcessor", "LogitsProcessorList", - "LogitsWarper", "MinLengthLogitsProcessor", "MinNewTokensLengthLogitsProcessor", "MinPLogitsWarper", @@ -89,7 +88,6 @@ else: "WatermarkLogitsProcessor", ] _import_structure["stopping_criteria"] = [ - "MaxNewTokensCriteria", "MaxLengthCriteria", "MaxTimeCriteria", "ConfidenceCriteria", @@ -230,7 +228,6 @@ if TYPE_CHECKING: LogitNormalization, LogitsProcessor, LogitsProcessorList, - LogitsWarper, MinLengthLogitsProcessor, MinNewTokensLengthLogitsProcessor, MinPLogitsWarper, @@ -254,7 +251,6 @@ if TYPE_CHECKING: ConfidenceCriteria, EosTokenCriteria, MaxLengthCriteria, - MaxNewTokensCriteria, MaxTimeCriteria, StoppingCriteria, StoppingCriteriaList, diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 39a38f9139..7351abb119 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -52,22 +52,6 @@ class LogitsProcessor: ) -class LogitsWarper: - """Abstract base class for all logit warpers that can be applied during generation with multinomial sampling.""" - - def __init__(self): - logger.warning_once( - "`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` " - "instead, which has the same properties and interface." - ) - - @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING) - def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: - raise NotImplementedError( - f"{self.__class__} is an abstract class. Only classes inheriting this class can be called." - ) - - class LogitsProcessorList(list): """ This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor. diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index beed2430b4..e4814ce4e7 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module): return target_dtype -# TODO Remove in deprecation cycle -class GPTNeoXFlashAttention2(GPTNeoXAttention): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - logger.warning_once( - "The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`" - "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48" - ) - - -# TODO Remove in deprecation cycle -class GPTNeoXSdpaAttention(GPTNeoXAttention): - def __init__(self, config, layer_idx=None): - super().__init__(config, layer_idx=layer_idx) - - logger.warning_once( - "The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`" - "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48" - ) - - # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX class GPTNeoXRotaryEmbedding(nn.Module): def __init__(self, config: GPTNeoXConfig, device=None): @@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module): return hidden_states -GPT_NEOX_ATTENTION_CLASSES = { - "eager": GPTNeoXAttention, - "flash_attention_2": GPTNeoXFlashAttention2, - "sdpa": GPTNeoXSdpaAttention, - "flex_attention": GPTNeoXAttention, -} - - class GPTNeoXLayer(nn.Module): def __init__(self, config, layer_idx): super().__init__() @@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module): self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.post_attention_dropout = nn.Dropout(config.hidden_dropout) self.post_mlp_dropout = nn.Dropout(config.hidden_dropout) - self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx) + self.attention = GPTNeoXAttention(config, layer_idx) self.mlp = GPTNeoXMLP(config) def forward( diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index bac6220a71..843ff871da 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject): requires_backends(self, ["torch"]) -class LogitsWarper(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class MaxLengthCriteria(metaclass=DummyObject): _backends = ["torch"] diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index a63ca59690..e588307690 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [ # Deprecated "InputExample", "InputFeatures", - "LogitsWarper", # Signature is *args/**kwargs "TFSequenceSummary", "TFBertTokenizer", diff --git a/utils/check_repo.py b/utils/check_repo.py index 7f3e0c66d5..d35bf27420 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [ "LineByLineTextDataset", "LineByLineWithRefDataset", "LineByLineWithSOPTextDataset", - "LogitsWarper", "NerPipeline", "PretrainedBartModel", "PretrainedFSMTModel",