Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e9f6fc67c | ||
|
|
4277b3dd46 |
2
setup.py
2
setup.py
@@ -418,7 +418,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.27.3", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.27.4", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.27.3"
|
||||
__version__ = "4.27.4"
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -172,7 +172,8 @@ class MultiHeadAttention(nn.Module):
|
||||
k, v = cache[self.layer_id]
|
||||
cache[self.layer_id] = (k, v)
|
||||
|
||||
scores = torch.matmul(q, k.transpose(2, 3)) / math.sqrt(dim_per_head) # (bs, n_heads, qlen, klen)
|
||||
q = q / math.sqrt(dim_per_head) # (bs, n_heads, qlen, dim_per_head)
|
||||
scores = torch.matmul(q, k.transpose(2, 3)) # (bs, n_heads, qlen, klen)
|
||||
mask = (mask == 0).view(mask_reshape).expand_as(scores) # (bs, n_heads, qlen, klen)
|
||||
scores.masked_fill_(mask, torch.finfo(scores.dtype).min) # (bs, n_heads, qlen, klen)
|
||||
|
||||
|
||||
@@ -176,7 +176,8 @@ class MultiHeadAttention(nn.Module):
|
||||
k, v = cache[self.layer_id]
|
||||
cache[self.layer_id] = (k, v)
|
||||
|
||||
scores = torch.matmul(q, k.transpose(2, 3)) / math.sqrt(dim_per_head) # (bs, n_heads, qlen, klen)
|
||||
q = q / math.sqrt(dim_per_head) # (bs, n_heads, qlen, dim_per_head)
|
||||
scores = torch.matmul(q, k.transpose(2, 3)) # (bs, n_heads, qlen, klen)
|
||||
mask = (mask == 0).view(mask_reshape).expand_as(scores) # (bs, n_heads, qlen, klen)
|
||||
scores.masked_fill_(mask, torch.finfo(scores.dtype).min) # (bs, n_heads, qlen, klen)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user