Compare commits

..

3 Commits

Author SHA1 Message Date
Arthur
d79b2d981f v4.55.4
Some checks failed
Release - Conda / build_and_package (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
2025-08-22 14:39:20 +02:00
Arthur
90792b730a Revert "Fix GPT-OSS swiglu_limit not passed in for MXFP4 #40197"
The cherry-picked commit does not match the changes nor the PR
This reverts commit e75d67ec39.
2025-08-22 11:21:18 +02:00
Daniel Han
a03df6acd4 Fix GPT-OSS swiglu_limit not passed in for MXFP4 (#40197)
Add swiglu_limit = 7.0
2025-08-22 11:20:23 +02:00
42 changed files with 43 additions and 52 deletions

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets[audio]>=1.14.0",
# "evaluate",
# "librosa",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "torch>=1.5.0",
# "torchvision>=0.6.0",
# "datasets>=1.8.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate>=0.12.0",
# "torch>=1.5.0",
# "torchvision>=0.6.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate>=0.12.0",
# "torch>=1.5.0",
# "torchvision>=0.6.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "torch>=1.5.0",
# "torchvision>=0.6.0",
# "datasets>=1.8.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "torch>=1.5.0",
# "torchvision>=0.6.0",
# "datasets>=1.8.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "torch>=1.5.0",
# "torchvision>=0.6.0",
# "datasets>=1.8.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "timm",
# "datasets",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "timm",
# "datasets",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "accelerate >= 0.12.0",
# "torch >= 1.3",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "sentencepiece != 0.1.92",
# "protobuf",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "sentencepiece != 0.1.92",
# "protobuf",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "timm",
# "datasets>=4.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "albumentations >= 1.4.16",
# "timm",
# "datasets>=4.0",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets >= 2.0.0",
# "torch >= 1.3",
# "accelerate",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets >= 2.0.0",
# "torch >= 1.3",
# "accelerate",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets[audio] >= 1.12.0",
# "torch >= 1.5",
# "torchaudio",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets[audio] >= 1.18.0",
# "torch >= 1.5",
# "torchaudio",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets[audio] >= 1.18.0",
# "torch >= 1.5",
# "torchaudio",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "datasets[audio] >= 1.18.0",
# "torch >= 1.5",
# "torchaudio",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -14,7 +14,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -16,7 +16,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -16,7 +16,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.21.0",
# "sentencepiece != 0.1.92",
# "protobuf",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.21.0",
# "sentencepiece != 0.1.92",
# "protobuf",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "seqeval",
# "datasets >= 1.8.0",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "seqeval",
# "datasets >= 1.8.0",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -15,7 +15,7 @@
# /// script
# dependencies = [
# "transformers==4.55.3",
# "transformers==4.55.4",
# "accelerate >= 0.12.0",
# "datasets >= 1.8.0",
# "sentencepiece != 0.1.92",

View File

@@ -463,7 +463,7 @@ install_requires = [
setup(
name="transformers",
version="4.55.3", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.55.4", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",

View File

@@ -18,7 +18,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.55.3"
__version__ = "4.55.4"
from pathlib import Path
from typing import TYPE_CHECKING

View File

@@ -172,7 +172,7 @@ class Mxfp4GptOssExperts(nn.Module):
torch.zeros(self.num_experts, self.hidden_size, dtype=torch.float32), requires_grad=False
)
self.alpha = 1.702
self.limit = getattr(config, "swiglu_limit", 7.0)
self.gate_up_proj_precision_config = None
self.down_proj_precision_config = None
@@ -185,7 +185,7 @@ class Mxfp4GptOssExperts(nn.Module):
swiglu_fn = triton_kernels_hub.swiglu.swiglu_fn
with torch.cuda.device(hidden_states.device):
act = FusedActivation(FnSpecs("swiglu", swiglu_fn, ("alpha", "limit")), (self.alpha, None), 2)
act = FusedActivation(FnSpecs("swiglu", swiglu_fn, ("alpha", "limit")), (self.alpha, self.limit), 2)
intermediate_cache1 = matmul_ogs(
hidden_states,

View File

@@ -61,16 +61,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
return
if not torch.cuda.is_available():
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
if not is_accelerate_available():
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
if not is_triton_available("3.4.0") or not is_triton_kernels_availalble():
if self.pre_quantized and not self.quantization_config.dequantize:
if self.pre_quantized:
logger.warning_once(
"Using MXFP4 quantized models requires a GPU, we will default to dequantizing the model to bf16"
)