Delete deprecated stuff (#38838)

* delete deprecated stuff * fix copies * remove unused tests * fix modernbert and fuyu * Update src/transformers/cache_utils.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * bye bye `seen_tokens` * address comments * update typings * ecnoder decoder models follow same pattern as whisper * fix copies * why is it set to False? * fix switch transformers * fix encoder decoder models shared weight * fix copies and RAG * remove `next_cache` * fix gptj/git * fix copies * fix copies * style... * another forgotten docsrting --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
2025-07-10 10:18:44 +05:00
parent c6ee0b1da8
commit bc161d5d06
141 changed files with 914 additions and 2164 deletions
--- a/tests/utils/test_modeling_rope_utils.py
+++ b/tests/utils/test_modeling_rope_utils.py
@@ -77,58 +77,6 @@ class RopeTest(unittest.TestCase):
                    self.assertEqual(len(logs.output), 1)
                    self.assertIn(model_specific_kwarg, logs.output[0])

-    def test_default_rope_function_bc(self):
-        config = LlamaConfig()
-        device = torch_device
-
-        rope_kwargs = {
-            "rope_type": "default",
-            "dim": config.hidden_size // config.num_attention_heads,
-            "max_position_embeddings": config.max_position_embeddings,
-            "base": config.rope_theta,
-        }
-
-        rope_fn = ROPE_INIT_FUNCTIONS["default"]
-        config_freqs = rope_fn(config=config, device=device)[0]
-        kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
-        torch.testing.assert_close(config_freqs, kwargs_freqs)
-
-    def test_linear_rope_function_bc(self):
-        config = LlamaConfig()
-        config.rope_scaling = {"rope_type": "linear", "factor": 10.0}
-        device = torch_device
-
-        rope_kwargs = {
-            "rope_type": "linear",
-            "dim": config.hidden_size // config.num_attention_heads,
-            "max_position_embeddings": config.max_position_embeddings,
-            "base": config.rope_theta,
-            "factor": 10.0,
-        }
-
-        rope_fn = ROPE_INIT_FUNCTIONS["linear"]
-        config_freqs = rope_fn(config=config, device=device)[0]
-        kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
-        torch.testing.assert_close(config_freqs, kwargs_freqs)
-
-    def test_dynamic_rope_function_bc(self):
-        config = LlamaConfig()
-        config.rope_scaling = {"rope_type": "dynamic", "factor": 10.0}
-        device = torch_device
-
-        rope_kwargs = {
-            "rope_type": "dynamic",
-            "dim": config.hidden_size // config.num_attention_heads,
-            "max_position_embeddings": config.max_position_embeddings,
-            "base": config.rope_theta,
-            "factor": 10.0,
-        }
-
-        rope_fn = ROPE_INIT_FUNCTIONS["dynamic"]
-        config_freqs = rope_fn(config=config, device=device)[0]
-        kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
-        torch.testing.assert_close(config_freqs, kwargs_freqs)
-
    def test_default_rope_numerically(self):
        # Note: some RoPE scaling methods start off by calling the default RoPE frequencies. If this test fails, then
        # multiple RoPE strategies will fail.