🧹 Remove deprecated RotaryEmbedding parts in the Attention layers (#34858)

* update * style * fix missing args * remove last trace of old rope classes * remove deprecated copied from * fix copies * trigger CIs * post rebase clean-up * reverse mistral * cleanup after dropping commits * Add comment
2024-12-11 11:16:52 +01:00
parent 9094b87dd4
commit d363e71d0e
31 changed files with 151 additions and 748 deletions
--- a/tests/models/stablelm/test_modeling_stablelm.py
+++ b/tests/models/stablelm/test_modeling_stablelm.py
@@ -44,11 +44,7 @@ if is_torch_available():
        StableLmForTokenClassification,
        StableLmModel,
    )
-    from transformers.models.stablelm.modeling_stablelm import (
-        StableLmDynamicNTKScalingRotaryEmbedding,
-        StableLmLinearScalingRotaryEmbedding,
-        StableLmRotaryEmbedding,
-    )
+    from transformers.models.stablelm.modeling_stablelm import StableLmRotaryEmbedding


 # Copied from transformers.tests.models.persimmon.test_modeling_persimmon.PersimmonModelTester with Persimmon -> StableLm
@@ -436,11 +432,12 @@ class StableLmModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM

        # Sanity check linear RoPE scaling
        # New position "x" should match original position with index "x/scaling_factor"
-        linear_scaling_rope = StableLmLinearScalingRotaryEmbedding(
+        linear_scaling_rope = StableLmRotaryEmbedding(
            head_dim,
            max_position_embeddings=config.max_position_embeddings,
            base=config.rope_theta,
            scaling_factor=scaling_factor,
+            rope_type="linear",
        ).to(torch_device)
        linear_cos_short, linear_sin_short = linear_scaling_rope(x, position_ids_short)
        linear_cos_long, linear_sin_long = linear_scaling_rope(x, position_ids_long)
@@ -454,11 +451,12 @@ class StableLmModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
        # Sanity check Dynamic NTK RoPE scaling
        # Scaling should only be observed after a long input is fed. We can observe that the frequencies increase
        # with scaling_factor (or that `inv_freq` decreases)
-        ntk_scaling_rope = StableLmDynamicNTKScalingRotaryEmbedding(
+        ntk_scaling_rope = StableLmRotaryEmbedding(
            head_dim,
            max_position_embeddings=config.max_position_embeddings,
            base=config.rope_theta,
            scaling_factor=scaling_factor,
+            rope_type="dynamic",
        ).to(torch_device)
        ntk_cos_short, ntk_sin_short = ntk_scaling_rope(x, position_ids_short)
        ntk_cos_long, ntk_sin_long = ntk_scaling_rope(x, position_ids_long)