Cache: models return input cache type (#30716)
This commit is contained in:
@@ -591,11 +591,6 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
msg=f"\n{tokenizer.batch_decode(res_eager)} \nvs\n{tokenizer.batch_decode(res_sdpa)}",
|
||||
)
|
||||
|
||||
@unittest.skip("TODO @gante fix this for Llama")
|
||||
@parameterized.expand([(1, False), (1, True), (4, False)])
|
||||
def test_new_cache_format(self, num_beams, do_sample):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
class LlamaIntegrationTest(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user