Avoid all-zeor attnetion mask used in testing (#26469)
fix Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -2960,7 +2960,8 @@ def ids_tensor(shape, vocab_size, rng=None, name=None):
|
|||||||
def random_attention_mask(shape, rng=None, name=None):
|
def random_attention_mask(shape, rng=None, name=None):
|
||||||
attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None)
|
attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None)
|
||||||
# make sure that at least one token is attended to for each batch
|
# make sure that at least one token is attended to for each batch
|
||||||
attn_mask[:, -1] = 1
|
# we choose the 1st token so this property of `at least one being non-zero` still holds after applying causal mask
|
||||||
|
attn_mask[:, 0] = 1
|
||||||
return attn_mask
|
return attn_mask
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user