From fc37f38915372c15992b540dfcbbe00a916d4fc6 Mon Sep 17 00:00:00 2001 From: Andrei Panferov Date: Wed, 21 Feb 2024 19:09:36 +0300 Subject: [PATCH] Add training version check for AQLM quantizer. (#29142) * training version check * warn old aqlm * aqlm 1.0.2 real * docs --- docs/source/en/quantization.md | 6 +++++- src/transformers/quantizers/quantizer_aqlm.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/source/en/quantization.md b/docs/source/en/quantization.md index 29ee188852..b8c09d4497 100644 --- a/docs/source/en/quantization.md +++ b/docs/source/en/quantization.md @@ -39,10 +39,14 @@ Inference support for AQLM is realised in the `aqlm` library. Make sure to insta pip install aqlm[gpu,cpu] ``` -The library provides efficient kernels for both GPU and CPU inference. +The library provides efficient kernels for both GPU and CPU inference and training. The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub [repository](https://github.com/Vahe1994/AQLM). +### PEFT + +Starting with version `aqlm 1.0.2`, AQLM supports Parameter-Efficient Fine-Tuning in a form of [LoRA](https://huggingface.co/docs/peft/package_reference/lora) integrated into the [PEFT](https://huggingface.co/blog/peft) library. + ### AQLM configurations AQLM quantization setpus vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are: diff --git a/src/transformers/quantizers/quantizer_aqlm.py b/src/transformers/quantizers/quantizer_aqlm.py index b8038942ef..c2366305b6 100644 --- a/src/transformers/quantizers/quantizer_aqlm.py +++ b/src/transformers/quantizers/quantizer_aqlm.py @@ -11,8 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import importlib from typing import TYPE_CHECKING, Optional +from packaging import version + from .base import HfQuantizer @@ -81,7 +84,14 @@ class AqlmHfQuantizer(HfQuantizer): @property def is_trainable(self, model: Optional["PreTrainedModel"] = None): - return False + aqlm_supports_training = version.parse(importlib.metadata.version("aqlm")) >= version.parse("1.0.2") + if aqlm_supports_training: + return True + else: + logger.warn( + f"Currently installed `aqlm` version ({importlib.metadata.version('aqlm')}) doesn't support training. If you wish to train a quantized model, please update `aqlm` with `pip install aqlm>=1.0.2`" + ) + return False @property def is_serializable(self):