From 80dbbd103c217f422de91a3265bf6d8e8bc414f7 Mon Sep 17 00:00:00 2001
From: Joao Gante <joaofranciscocardosogante@gmail.com>
Date: Thu, 16 Jan 2025 17:03:20 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B9=20remove=20`generate`-related=20ob?=
 =?UTF-8?q?jects=20and=20methods=20scheduled=20for=20removal=20in=20v4.48?=
 =?UTF-8?q?=20(#35677)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove things scheduled for removal

* make fixup
---
 src/transformers/__init__.py                  |  2 --
 src/transformers/cache_utils.py               | 11 -------
 src/transformers/generation/__init__.py       |  4 ---
 src/transformers/generation/logits_process.py | 16 ----------
 .../models/gpt_neox/modeling_gpt_neox.py      | 32 +------------------
 src/transformers/utils/dummy_pt_objects.py    |  7 ----
 utils/check_docstrings.py                     |  1 -
 utils/check_repo.py                           |  1 -
 8 files changed, 1 insertion(+), 73 deletions(-)

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 1cf0f88ad6..39a9c56234 100755
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -1377,7 +1377,6 @@ else:
             "LogitNormalization",
             "LogitsProcessor",
             "LogitsProcessorList",
-            "LogitsWarper",
             "MaxLengthCriteria",
             "MaxTimeCriteria",
             "MinLengthLogitsProcessor",
@@ -6460,7 +6459,6 @@ if TYPE_CHECKING:
             LogitNormalization,
             LogitsProcessor,
             LogitsProcessorList,
-            LogitsWarper,
             MaxLengthCriteria,
             MaxTimeCriteria,
             MinLengthLogitsProcessor,
diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py
index ad497581c0..e616adbe67 100644
--- a/src/transformers/cache_utils.py
+++ b/src/transformers/cache_utils.py
@@ -63,17 +63,6 @@ class Cache(torch.nn.Module):
         # TODO: deprecate this function in favor of `cache_position`
         raise NotImplementedError("Make sure to implement `get_seq_length` in a subclass.")
 
-    # Deprecate in favor of max-cache-shape because we want to be specifc by what we mean with "max_length"
-    # Prev some cache objects didn't have "max_length" (SlidingWindowCache or SinkCache) because the cache object technically handles
-    # infinite amount of tokens. In the codebase what we really need to check is the max capacity of certain cache instances, so
-    # we change naming to be more explicit
-    def get_max_length(self) -> Optional[int]:
-        logger.warning_once(
-            "`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. "
-            "Calling `get_max_cache()` will raise error from v4.48"
-        )
-        return self.get_max_cache_shape()
-
     def get_max_cache_shape(self) -> Optional[int]:
         """Returns the maximum sequence length (i.e. max capacity) of the cache object"""
         raise NotImplementedError("Make sure to implement `get_max_cache_shape` in a subclass.")
diff --git a/src/transformers/generation/__init__.py b/src/transformers/generation/__init__.py
index d3eb10c1e6..ea39e8a10b 100644
--- a/src/transformers/generation/__init__.py
+++ b/src/transformers/generation/__init__.py
@@ -68,7 +68,6 @@ else:
         "LogitNormalization",
         "LogitsProcessor",
         "LogitsProcessorList",
-        "LogitsWarper",
         "MinLengthLogitsProcessor",
         "MinNewTokensLengthLogitsProcessor",
         "MinPLogitsWarper",
@@ -89,7 +88,6 @@ else:
         "WatermarkLogitsProcessor",
     ]
     _import_structure["stopping_criteria"] = [
-        "MaxNewTokensCriteria",
         "MaxLengthCriteria",
         "MaxTimeCriteria",
         "ConfidenceCriteria",
@@ -230,7 +228,6 @@ if TYPE_CHECKING:
             LogitNormalization,
             LogitsProcessor,
             LogitsProcessorList,
-            LogitsWarper,
             MinLengthLogitsProcessor,
             MinNewTokensLengthLogitsProcessor,
             MinPLogitsWarper,
@@ -254,7 +251,6 @@ if TYPE_CHECKING:
             ConfidenceCriteria,
             EosTokenCriteria,
             MaxLengthCriteria,
-            MaxNewTokensCriteria,
             MaxTimeCriteria,
             StoppingCriteria,
             StoppingCriteriaList,
diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py
index 39a38f9139..7351abb119 100644
--- a/src/transformers/generation/logits_process.py
+++ b/src/transformers/generation/logits_process.py
@@ -52,22 +52,6 @@ class LogitsProcessor:
         )
 
 
-class LogitsWarper:
-    """Abstract base class for all logit warpers that can be applied during generation with multinomial sampling."""
-
-    def __init__(self):
-        logger.warning_once(
-            "`LogitsWarper` is deprecated and will be removed in v4.48. Your class should inherit `LogitsProcessor` "
-            "instead, which has the same properties and interface."
-        )
-
-    @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
-    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-        raise NotImplementedError(
-            f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
-        )
-
-
 class LogitsProcessorList(list):
     """
     This class can be used to create a list of [`LogitsProcessor`] to subsequently process a `scores` input tensor.
diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
index beed2430b4..e4814ce4e7 100755
--- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py
+++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@@ -467,28 +467,6 @@ class GPTNeoXAttention(nn.Module):
         return target_dtype
 
 
-# TODO Remove in deprecation cycle
-class GPTNeoXFlashAttention2(GPTNeoXAttention):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        logger.warning_once(
-            "The `GPTNeoXFlashAttention2` class is deprecated in favor of simply modifying the `config._attn_implementation`"
-            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
-        )
-
-
-# TODO Remove in deprecation cycle
-class GPTNeoXSdpaAttention(GPTNeoXAttention):
-    def __init__(self, config, layer_idx=None):
-        super().__init__(config, layer_idx=layer_idx)
-
-        logger.warning_once(
-            "The `GPTNeoXSdpaAttention` class is deprecated in favor of simply modifying the `config._attn_implementation`"
-            "attribute of the `GPTNeoXAttention` class! It will be removed in v4.48"
-        )
-
-
 # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->GPTNeoX
 class GPTNeoXRotaryEmbedding(nn.Module):
     def __init__(self, config: GPTNeoXConfig, device=None):
@@ -600,14 +578,6 @@ class GPTNeoXMLP(nn.Module):
         return hidden_states
 
 
-GPT_NEOX_ATTENTION_CLASSES = {
-    "eager": GPTNeoXAttention,
-    "flash_attention_2": GPTNeoXFlashAttention2,
-    "sdpa": GPTNeoXSdpaAttention,
-    "flex_attention": GPTNeoXAttention,
-}
-
-
 class GPTNeoXLayer(nn.Module):
     def __init__(self, config, layer_idx):
         super().__init__()
@@ -616,7 +586,7 @@ class GPTNeoXLayer(nn.Module):
         self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
         self.post_attention_dropout = nn.Dropout(config.hidden_dropout)
         self.post_mlp_dropout = nn.Dropout(config.hidden_dropout)
-        self.attention = GPT_NEOX_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
+        self.attention = GPTNeoXAttention(config, layer_idx)
         self.mlp = GPTNeoXMLP(config)
 
     def forward(
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index bac6220a71..843ff871da 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -352,13 +352,6 @@ class LogitsProcessorList(metaclass=DummyObject):
         requires_backends(self, ["torch"])
 
 
-class LogitsWarper(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class MaxLengthCriteria(metaclass=DummyObject):
     _backends = ["torch"]
 
diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py
index a63ca59690..e588307690 100644
--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@@ -70,7 +70,6 @@ OBJECTS_TO_IGNORE = [
     # Deprecated
     "InputExample",
     "InputFeatures",
-    "LogitsWarper",
     # Signature is *args/**kwargs
     "TFSequenceSummary",
     "TFBertTokenizer",
diff --git a/utils/check_repo.py b/utils/check_repo.py
index 7f3e0c66d5..d35bf27420 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -946,7 +946,6 @@ DEPRECATED_OBJECTS = [
     "LineByLineTextDataset",
     "LineByLineWithRefDataset",
     "LineByLineWithSOPTextDataset",
-    "LogitsWarper",
     "NerPipeline",
     "PretrainedBartModel",
     "PretrainedFSMTModel",