[generate] return Cache object even if passed in a legacy format (#35673)

* generate returns a Cache object by default * fix tests * fix test for encoder-decoder models
2025-01-16 17:06:24 +00:00
parent 2818307e93
commit 94af1c0aa2
9 changed files with 36 additions and 156 deletions
--- a/tests/models/aria/test_modeling_aria.py
+++ b/tests/models/aria/test_modeling_aria.py
@@ -268,18 +268,6 @@ class AriaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMi
    def test_sdpa_can_dispatch_on_flash(self):
        pass

-    @unittest.skip(reason="")
-    def test_new_cache_format_0(self):
-        pass
-
-    @unittest.skip(reason="")
-    def test_new_cache_format_1(self):
-        pass
-
-    @unittest.skip(reason="")
-    def test_new_cache_format_2(self):
-        pass
-
    @unittest.skip(reason="Feedforward chunking is not yet supported")
    def test_feed_forward_chunking(self):
        pass
--- a/tests/models/bamba/test_modeling_bamba.py
+++ b/tests/models/bamba/test_modeling_bamba.py
@@ -18,7 +18,6 @@ import inspect
 import unittest

 import pytest
-from parameterized import parameterized

 from transformers import AutoTokenizer, BambaConfig, is_torch_available
 from transformers.testing_utils import (
@@ -395,11 +394,6 @@ class BambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
            )

-    @unittest.skip(reason="Bamba has its own special cache type")
-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-
    def test_batching_equivalence(self):
        # need to disable the tril input mask
        orig = self.model_tester.use_input_mask
--- a/tests/models/cohere2/test_modeling_cohere2.py
+++ b/tests/models/cohere2/test_modeling_cohere2.py
@@ -103,11 +103,6 @@ class Cohere2ModelTest(CohereModelTest, unittest.TestCase):
    def test_dola_decoding_sample(self):
        pass

-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    @unittest.skip("Cohere2 has HybridCache and doesn't support old tuple format at all")
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-
    @unittest.skip("Cohere2 has HybridCache and doesn't support continue from past kv")
    def test_generate_continue_from_past_key_values(self):
        pass
--- a/tests/models/gemma2/test_modeling_gemma2.py
+++ b/tests/models/gemma2/test_modeling_gemma2.py
@@ -117,11 +117,6 @@ class Gemma2ModelTest(GemmaModelTest, unittest.TestCase):
    def test_dola_decoding_sample(self):
        pass

-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    @unittest.skip("Gemma2 has HybridCache and doesn't support old tuple format at all")
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-
    @unittest.skip("Gemma2 has HybridCache and doesn't support continue from past kv")
    def test_generate_continue_from_past_key_values(self):
        pass
--- a/tests/models/jamba/test_modeling_jamba.py
+++ b/tests/models/jamba/test_modeling_jamba.py
@@ -19,7 +19,6 @@ import tempfile
 import unittest

 import pytest
-from parameterized import parameterized

 from transformers import AutoTokenizer, JambaConfig, is_torch_available
 from transformers.testing_utils import (
@@ -550,11 +549,6 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        """
        self.skipTest(reason="Jamba flash attention does not support right padding")

-    @unittest.skip(reason="Jamba has its own special cache type")
-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-

@require_torch
 class JambaModelIntegrationTest(unittest.TestCase):
--- a/tests/models/jetmoe/test_modeling_jetmoe.py
+++ b/tests/models/jetmoe/test_modeling_jetmoe.py
@@ -18,7 +18,6 @@ import gc
 import unittest

 import pytest
-from parameterized import parameterized

 from transformers import AutoTokenizer, JetMoeConfig, is_torch_available
 from transformers.testing_utils import (
@@ -299,10 +298,6 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    test_disk_offload_bin = False
    test_disk_offload_safetensors = False

-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-
    def setUp(self):
        self.model_tester = JetMoeModelTester(self)
        self.config_tester = ConfigTester(
--- a/tests/models/zamba/test_modeling_zamba.py
+++ b/tests/models/zamba/test_modeling_zamba.py
@@ -19,7 +19,6 @@ import tempfile
 import unittest

 import pytest
-from parameterized import parameterized

 from transformers import AutoTokenizer, ZambaConfig, is_torch_available
 from transformers.testing_utils import (
@@ -551,11 +550,6 @@ class ZambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        """
        self.skipTest(reason="Zamba flash attention does not support right padding")

-    @unittest.skip(reason="Zamba has its own special cache type")
-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-

@require_torch
 class ZambaModelIntegrationTest(unittest.TestCase):