v4.44.2

Fix regression on Processor.save_pretrained caused by #31691 (#32921 )
fix save_pretrained
2024-08-22 18:46:28 +02:00 · 2024-08-22 18:46:09 +02:00 · 2024-08-22 18:46:03 +02:00 · 2024-08-22 18:46:02 +02:00
4 changed files with 5 additions and 4 deletions
--- a/setup.py
+++ b/setup.py
@@ -430,7 +430,7 @@ install_requires = [

 setup(
    name="transformers",
-    version="4.44.1",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    version="4.44.2",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
    author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
    author_email="transformers@huggingface.co",
    description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -18,7 +18,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).

-__version__ = "4.44.1"
+__version__ = "4.44.2"

 from typing import TYPE_CHECKING

--- a/src/transformers/models/jamba/modeling_jamba.py
+++ b/src/transformers/models/jamba/modeling_jamba.py
@@ -210,6 +210,7 @@ class HybridMambaAttentionDynamicCache(DynamicCache):
    """

    def __init__(self, config, batch_size, dtype=torch.float16, device=None):
+        super().__init__()
        self.dtype = dtype
        self.layers_block_type = config.layers_block_type
        self.has_previous_state = False  # only used by mamba
@@ -629,7 +630,7 @@ class JambaMambaMixer(nn.Module):

        # S4D real initialization. These are not discretized!
        # The core is to load them, compute the discrete states, then write the updated state. Keeps the memory bounded
-        A = torch.arange(1, self.ssm_state_size + 1, dtype=torch.float32)[None, :]
+        A = torch.arange(1, self.ssm_state_size + 1)[None, :]
        A = A.expand(self.intermediate_size, -1).contiguous()

        self.A_log = nn.Parameter(torch.log(A))
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -522,7 +522,7 @@ class ProcessorMixin(PushToHubMixin):
                token=kwargs.get("token"),
            )

-        if set(self.to_dict().keys()) == {"processor_class"}:
+        if set(processor_dict.keys()) == {"processor_class"}:
            return []
        return [output_processor_file]
Author	SHA1	Message	Date
Arthur Zucker	174890280b	v4.44.2 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2024-08-22 18:46:28 +02:00
Franz Louis Cesista	68451445d2	Fix regression on `Processor.save_pretrained` caused by #31691 (#32921 ) fix save_pretrained	2024-08-22 18:46:09 +02:00
Gal Cohen (galco)	3d8cba81a0	fix: no need to dtype A in jamba (#32924 ) Co-authored-by: Gal Cohen <galc@ai21.com>	2024-08-22 18:46:03 +02:00
Gal Cohen (galco)	c1df7f885f	fix: jamba cache fails to use torch.nn.module (#32894 ) Co-authored-by: Gal Cohen <galc@ai21.com>	2024-08-22 18:46:02 +02:00