From 391177441b133645c02181b57370ab12f71b88c4 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:06:06 +0200 Subject: [PATCH] Avoid all-zeor attnetion mask used in testing (#26469) fix Co-authored-by: ydshieh --- tests/test_modeling_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 8c2a277b4b..2789fe32c1 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -2960,7 +2960,8 @@ def ids_tensor(shape, vocab_size, rng=None, name=None): def random_attention_mask(shape, rng=None, name=None): attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None) # make sure that at least one token is attended to for each batch - attn_mask[:, -1] = 1 + # we choose the 1st token so this property of `at least one being non-zero` still holds after applying causal mask + attn_mask[:, 0] = 1 return attn_mask