Fix bigbird random attention (#21023)

* switch np.random.permutation to jax.random.permuation

* remove comments

* remove leftover comment

* skip similarity tests

* modify indices_prng_key usage, add deterministic behaviour

* update style

* remove unused import

* remove copy statement since classes are not identical

* remove numpy import

* revert removing copied from statements

* make style from copied

* remove copied from statement

* update copied from statement to include only np.ndarry

* add deterministic args, unittestskip equivalence tests
This commit is contained in:
Bartosz Szmelczynski
2023-04-27 19:52:28 +02:00
committed by GitHub
parent 27b66bea01
commit 88399476c3
3 changed files with 135 additions and 58 deletions

View File

@@ -14,10 +14,8 @@
import unittest
import numpy as np
from transformers import BigBirdConfig, is_flax_available
from transformers.testing_utils import require_flax, slow
from transformers.testing_utils import is_pt_flax_cross_test, require_flax, slow
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor, random_attention_mask
@@ -129,7 +127,11 @@ class FlaxBigBirdModelTester(unittest.TestCase):
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, input_ids, token_type_ids, attention_mask = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask}
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_mask,
}
return config, inputs_dict
@@ -180,8 +182,7 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes:
model = model_class_name.from_pretrained("google/bigbird-roberta-base")
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)
self.assertIsNotNone(model)
def test_attention_outputs(self):
if self.test_attn_probs:
@@ -220,3 +221,17 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
return
else:
super().check_pt_flax_outputs(fx_outputs, pt_outputs, model_class, tol, name, attributes)
@is_pt_flax_cross_test
@unittest.skip(
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
)
def test_equivalence_flax_to_pt(self):
pass
@is_pt_flax_cross_test
@unittest.skip(
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
)
def test_equivalence_pt_to_flax(self):
pass