Cache: use batch_size instead of max_batch_size (#32657)

* more precise name * better docstrings * Update src/transformers/cache_utils.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
2024-08-16 11:48:45 +01:00
parent 8f9fa3b081
commit cf32ee1753
9 changed files with 112 additions and 54 deletions
--- a/tests/quantization/aqlm_integration/test_aqlm.py
+++ b/tests/quantization/aqlm_integration/test_aqlm.py
@@ -216,7 +216,7 @@ class AqlmTest(unittest.TestCase):
        # Setup static KV cache for generation
        past_key_values = StaticCache(
            config=self.quantized_model.config,
-            max_batch_size=1,
+            batch_size=1,
            max_cache_len=seq_length + self.max_new_tokens + 1,
            device=torch_device,
            dtype=self.quantized_model.config._pre_quantization_dtype,