Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d79b2d981f | ||
|
|
90792b730a | ||
|
|
a03df6acd4 |
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets[audio]>=1.14.0",
|
||||
# "evaluate",
|
||||
# "librosa",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate>=0.12.0",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate>=0.12.0",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets>=4.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets>=4.0",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets >= 2.0.0",
|
||||
# "torch >= 1.3",
|
||||
# "accelerate",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets >= 2.0.0",
|
||||
# "torch >= 1.3",
|
||||
# "accelerate",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets[audio] >= 1.12.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.21.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.21.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "seqeval",
|
||||
# "datasets >= 1.8.0",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "seqeval",
|
||||
# "datasets >= 1.8.0",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers==4.55.3",
|
||||
# "transformers==4.55.4",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
|
||||
2
setup.py
2
setup.py
@@ -463,7 +463,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.55.3", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.55.4", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.55.3"
|
||||
__version__ = "4.55.4"
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -172,7 +172,7 @@ class Mxfp4GptOssExperts(nn.Module):
|
||||
torch.zeros(self.num_experts, self.hidden_size, dtype=torch.float32), requires_grad=False
|
||||
)
|
||||
self.alpha = 1.702
|
||||
|
||||
self.limit = getattr(config, "swiglu_limit", 7.0)
|
||||
self.gate_up_proj_precision_config = None
|
||||
self.down_proj_precision_config = None
|
||||
|
||||
@@ -185,7 +185,7 @@ class Mxfp4GptOssExperts(nn.Module):
|
||||
swiglu_fn = triton_kernels_hub.swiglu.swiglu_fn
|
||||
|
||||
with torch.cuda.device(hidden_states.device):
|
||||
act = FusedActivation(FnSpecs("swiglu", swiglu_fn, ("alpha", "limit")), (self.alpha, None), 2)
|
||||
act = FusedActivation(FnSpecs("swiglu", swiglu_fn, ("alpha", "limit")), (self.alpha, self.limit), 2)
|
||||
|
||||
intermediate_cache1 = matmul_ogs(
|
||||
hidden_states,
|
||||
|
||||
@@ -61,16 +61,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
||||
return
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
|
||||
|
||||
if not is_accelerate_available():
|
||||
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")
|
||||
|
||||
compute_capability = torch.cuda.get_device_capability()
|
||||
major, minor = compute_capability
|
||||
|
||||
if not is_triton_available("3.4.0") or not is_triton_kernels_availalble():
|
||||
if self.pre_quantized and not self.quantization_config.dequantize:
|
||||
if self.pre_quantized:
|
||||
logger.warning_once(
|
||||
"Using MXFP4 quantized models requires a GPU, we will default to dequantizing the model to bf16"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user