Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
174890280b | ||
|
|
68451445d2 | ||
|
|
3d8cba81a0 | ||
|
|
c1df7f885f |
2
setup.py
2
setup.py
@@ -430,7 +430,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.44.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.44.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.44.1"
|
||||
__version__ = "4.44.2"
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -210,6 +210,7 @@ class HybridMambaAttentionDynamicCache(DynamicCache):
|
||||
"""
|
||||
|
||||
def __init__(self, config, batch_size, dtype=torch.float16, device=None):
|
||||
super().__init__()
|
||||
self.dtype = dtype
|
||||
self.layers_block_type = config.layers_block_type
|
||||
self.has_previous_state = False # only used by mamba
|
||||
@@ -629,7 +630,7 @@ class JambaMambaMixer(nn.Module):
|
||||
|
||||
# S4D real initialization. These are not discretized!
|
||||
# The core is to load them, compute the discrete states, then write the updated state. Keeps the memory bounded
|
||||
A = torch.arange(1, self.ssm_state_size + 1, dtype=torch.float32)[None, :]
|
||||
A = torch.arange(1, self.ssm_state_size + 1)[None, :]
|
||||
A = A.expand(self.intermediate_size, -1).contiguous()
|
||||
|
||||
self.A_log = nn.Parameter(torch.log(A))
|
||||
|
||||
@@ -522,7 +522,7 @@ class ProcessorMixin(PushToHubMixin):
|
||||
token=kwargs.get("token"),
|
||||
)
|
||||
|
||||
if set(self.to_dict().keys()) == {"processor_class"}:
|
||||
if set(processor_dict.keys()) == {"processor_class"}:
|
||||
return []
|
||||
return [output_processor_file]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user