Cache: models return input cache type (#30716)

This commit is contained in:
Joao Gante
2024-05-08 18:26:34 +01:00
committed by GitHub
parent 71c1985069
commit f26e407370
11 changed files with 30 additions and 70 deletions

View File

@@ -353,11 +353,6 @@ class OlmoModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
# The output should be different for long inputs
self.assertFalse(torch.allclose(original_long_output, scaled_long_output, atol=1e-5))
@unittest.skip("TODO @gante fix this for OLMo")
@parameterized.expand([(1, False), (1, True), (4, False)])
def test_new_cache_format(self, num_beams, do_sample):
pass
@require_torch
class OlmoIntegrationTest(unittest.TestCase):