[Quantization] Switch to optimum-quanto (#31732)

* switch to optimum-quanto rebase squach

* fix import check

* again

* test try-except

* style
This commit is contained in:
Marc Sun
2024-10-02 15:14:34 +02:00
committed by GitHub
parent b7474f211d
commit cac4a4876b
10 changed files with 121 additions and 55 deletions

View File

@@ -29,7 +29,7 @@ from transformers.testing_utils import (
is_flaky,
require_accelerate,
require_auto_gptq,
require_quanto,
require_optimum_quanto,
require_torch,
require_torch_gpu,
require_torch_multi_accelerator,
@@ -1941,7 +1941,7 @@ class GenerationTesterMixin:
self.assertTrue(len(results.past_key_values.key_cache) == num_hidden_layers)
self.assertTrue(results.past_key_values.key_cache[0].shape == cache_shape)
@require_quanto
@require_optimum_quanto
@pytest.mark.generate
def test_generate_with_quant_cache(self):
for model_class in self.all_generative_model_classes: