Delete deprecated stuff (#38838)

* delete deprecated stuff

* fix copies

* remove unused tests

* fix modernbert and fuyu

* Update src/transformers/cache_utils.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* bye bye `seen_tokens`

* address comments

* update typings

* ecnoder decoder models follow same pattern as whisper

* fix copies

* why is it set to False?

* fix switch transformers

* fix encoder decoder models shared weight

* fix copies and RAG

* remove `next_cache`

* fix gptj/git

* fix copies

* fix copies

* style...

* another forgotten docsrting

---------

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
This commit is contained in:
Raushan Turganbay
2025-07-10 10:18:44 +05:00
committed by GitHub
parent c6ee0b1da8
commit bc161d5d06
141 changed files with 914 additions and 2164 deletions

View File

@@ -77,58 +77,6 @@ class RopeTest(unittest.TestCase):
self.assertEqual(len(logs.output), 1)
self.assertIn(model_specific_kwarg, logs.output[0])
def test_default_rope_function_bc(self):
config = LlamaConfig()
device = torch_device
rope_kwargs = {
"rope_type": "default",
"dim": config.hidden_size // config.num_attention_heads,
"max_position_embeddings": config.max_position_embeddings,
"base": config.rope_theta,
}
rope_fn = ROPE_INIT_FUNCTIONS["default"]
config_freqs = rope_fn(config=config, device=device)[0]
kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
torch.testing.assert_close(config_freqs, kwargs_freqs)
def test_linear_rope_function_bc(self):
config = LlamaConfig()
config.rope_scaling = {"rope_type": "linear", "factor": 10.0}
device = torch_device
rope_kwargs = {
"rope_type": "linear",
"dim": config.hidden_size // config.num_attention_heads,
"max_position_embeddings": config.max_position_embeddings,
"base": config.rope_theta,
"factor": 10.0,
}
rope_fn = ROPE_INIT_FUNCTIONS["linear"]
config_freqs = rope_fn(config=config, device=device)[0]
kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
torch.testing.assert_close(config_freqs, kwargs_freqs)
def test_dynamic_rope_function_bc(self):
config = LlamaConfig()
config.rope_scaling = {"rope_type": "dynamic", "factor": 10.0}
device = torch_device
rope_kwargs = {
"rope_type": "dynamic",
"dim": config.hidden_size // config.num_attention_heads,
"max_position_embeddings": config.max_position_embeddings,
"base": config.rope_theta,
"factor": 10.0,
}
rope_fn = ROPE_INIT_FUNCTIONS["dynamic"]
config_freqs = rope_fn(config=config, device=device)[0]
kwargs_freqs = rope_fn(**rope_kwargs, device=device)[0]
torch.testing.assert_close(config_freqs, kwargs_freqs)
def test_default_rope_numerically(self):
# Note: some RoPE scaling methods start off by calling the default RoPE frequencies. If this test fails, then
# multiple RoPE strategies will fail.