[Quantization] Switch to optimum-quanto (#31732)

* switch to optimum-quanto rebase squach * fix import check * again * test try-except * style
2024-10-02 15:14:34 +02:00
parent b7474f211d
commit cac4a4876b
10 changed files with 121 additions and 55 deletions
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -29,7 +29,7 @@ from transformers.testing_utils import (
    is_flaky,
    require_accelerate,
    require_auto_gptq,
-    require_quanto,
+    require_optimum_quanto,
    require_torch,
    require_torch_gpu,
    require_torch_multi_accelerator,
@@ -1941,7 +1941,7 @@ class GenerationTesterMixin:
            self.assertTrue(len(results.past_key_values.key_cache) == num_hidden_layers)
            self.assertTrue(results.past_key_values.key_cache[0].shape == cache_shape)

-    @require_quanto
+    @require_optimum_quanto
    @pytest.mark.generate
    def test_generate_with_quant_cache(self):
        for model_class in self.all_generative_model_classes: