Compare commits

...

4 Commits

Author SHA1 Message Date
Arthur Zucker
174890280b v4.44.2
Some checks failed
Release - Conda / build_and_package (push) Has been cancelled
Secret Leaks / trufflehog (push) Has been cancelled
2024-08-22 18:46:28 +02:00
Franz Louis Cesista
68451445d2 Fix regression on Processor.save_pretrained caused by #31691 (#32921)
fix save_pretrained
2024-08-22 18:46:09 +02:00
Gal Cohen (galco)
3d8cba81a0 fix: no need to dtype A in jamba (#32924)
Co-authored-by: Gal Cohen <galc@ai21.com>
2024-08-22 18:46:03 +02:00
Gal Cohen (galco)
c1df7f885f fix: jamba cache fails to use torch.nn.module (#32894)
Co-authored-by: Gal Cohen <galc@ai21.com>
2024-08-22 18:46:02 +02:00
4 changed files with 5 additions and 4 deletions

View File

@@ -430,7 +430,7 @@ install_requires = [
setup(
name="transformers",
version="4.44.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.44.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",

View File

@@ -18,7 +18,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.44.1"
__version__ = "4.44.2"
from typing import TYPE_CHECKING

View File

@@ -210,6 +210,7 @@ class HybridMambaAttentionDynamicCache(DynamicCache):
"""
def __init__(self, config, batch_size, dtype=torch.float16, device=None):
super().__init__()
self.dtype = dtype
self.layers_block_type = config.layers_block_type
self.has_previous_state = False # only used by mamba
@@ -629,7 +630,7 @@ class JambaMambaMixer(nn.Module):
# S4D real initialization. These are not discretized!
# The core is to load them, compute the discrete states, then write the updated state. Keeps the memory bounded
A = torch.arange(1, self.ssm_state_size + 1, dtype=torch.float32)[None, :]
A = torch.arange(1, self.ssm_state_size + 1)[None, :]
A = A.expand(self.intermediate_size, -1).contiguous()
self.A_log = nn.Parameter(torch.log(A))

View File

@@ -522,7 +522,7 @@ class ProcessorMixin(PushToHubMixin):
token=kwargs.get("token"),
)
if set(self.to_dict().keys()) == {"processor_class"}:
if set(processor_dict.keys()) == {"processor_class"}:
return []
return [output_processor_file]