From 4705b04c74d5576e3256e515b587a59cadec96a9 Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Mon, 31 Mar 2025 16:53:48 +0800 Subject: [PATCH] Fix 4090/ada not detected as having FP8 support (#37067) fix 4090/ada not detected as having FP8 support Signed-off-by: Qubitium Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com> Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> --- src/transformers/quantizers/quantizer_finegrained_fp8.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/quantizers/quantizer_finegrained_fp8.py b/src/transformers/quantizers/quantizer_finegrained_fp8.py index ac6b735512..16ce7f6a9e 100644 --- a/src/transformers/quantizers/quantizer_finegrained_fp8.py +++ b/src/transformers/quantizers/quantizer_finegrained_fp8.py @@ -52,9 +52,10 @@ class FineGrainedFP8HfQuantizer(HfQuantizer): compute_capability = torch.cuda.get_device_capability() major, minor = compute_capability - if major < 9: + if (major < 8) or (major == 8 and minor < 9): raise ValueError( - "FP8 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100)" + "FP8 quantized models is only supported on GPUs with compute capability >= 8.9 (e.g 4090/H100)" + f", actual = `{major}.{minor}`" ) device_map = kwargs.get("device_map", None)