Cache: models return input cache type (#30716)

2024-05-08 18:26:34 +01:00
parent 71c1985069
commit f26e407370
11 changed files with 30 additions and 70 deletions
--- a/tests/models/olmo/test_modeling_olmo.py
+++ b/tests/models/olmo/test_modeling_olmo.py
@@ -353,11 +353,6 @@ class OlmoModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        # The output should be different for long inputs
        self.assertFalse(torch.allclose(original_long_output, scaled_long_output, atol=1e-5))

-    @unittest.skip("TODO @gante fix this for OLMo")
-    @parameterized.expand([(1, False), (1, True), (4, False)])
-    def test_new_cache_format(self, num_beams, do_sample):
-        pass
-

@require_torch
 class OlmoIntegrationTest(unittest.TestCase):