From 9b3bf4a2065811c3845cd8d456f5b6f10b9906fa Mon Sep 17 00:00:00 2001 From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> Date: Thu, 24 Apr 2025 11:10:27 +0200 Subject: [PATCH] Fix torchao doc examples (#37697) fix Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> --- docs/source/en/quantization/torchao.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/en/quantization/torchao.md b/docs/source/en/quantization/torchao.md index f3153d9f22..42fed458f7 100644 --- a/docs/source/en/quantization/torchao.md +++ b/docs/source/en/quantization/torchao.md @@ -149,7 +149,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True)) ```py import torch from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer -from torchao.quantization import Int8WeightOnlyConfig +from torchao.quantization import Int8DynamicActivationInt8WeightConfig quant_config = Int8DynamicActivationInt8WeightConfig() # or int8 weight only quantization @@ -179,7 +179,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True)) ```py import torch from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer -from torchao.quantization import Int4WeightOnlyConfig +from torchao.quantization import GemliteUIntXWeightOnlyConfig # For batch size N, we recommend gemlite, which may require autotuning # default is 4 bit, 8 bit is also supported by passing `bit_width=8` @@ -216,7 +216,7 @@ print(tokenizer.decode(output[0], skip_special_tokens=True)) ```py import torch from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer -from torchao.quantization import Int8WeightOnlyConfig +from torchao.quantization import Int8DynamicActivationInt8WeightConfig quant_config = Int8DynamicActivationInt8WeightConfig() # quant_config = Int8WeightOnlyConfig()