Fix bigbird random attention (#21023)
* switch np.random.permutation to jax.random.permuation * remove comments * remove leftover comment * skip similarity tests * modify indices_prng_key usage, add deterministic behaviour * update style * remove unused import * remove copy statement since classes are not identical * remove numpy import * revert removing copied from statements * make style from copied * remove copied from statement * update copied from statement to include only np.ndarry * add deterministic args, unittestskip equivalence tests
This commit is contained in:
committed by
GitHub
parent
27b66bea01
commit
88399476c3
@@ -14,10 +14,8 @@
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import BigBirdConfig, is_flax_available
|
||||
from transformers.testing_utils import require_flax, slow
|
||||
from transformers.testing_utils import is_pt_flax_cross_test, require_flax, slow
|
||||
|
||||
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor, random_attention_mask
|
||||
|
||||
@@ -129,7 +127,11 @@ class FlaxBigBirdModelTester(unittest.TestCase):
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
config, input_ids, token_type_ids, attention_mask = config_and_inputs
|
||||
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask}
|
||||
inputs_dict = {
|
||||
"input_ids": input_ids,
|
||||
"token_type_ids": token_type_ids,
|
||||
"attention_mask": attention_mask,
|
||||
}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@@ -180,8 +182,7 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
def test_model_from_pretrained(self):
|
||||
for model_class_name in self.all_model_classes:
|
||||
model = model_class_name.from_pretrained("google/bigbird-roberta-base")
|
||||
outputs = model(np.ones((1, 1)))
|
||||
self.assertIsNotNone(outputs)
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
def test_attention_outputs(self):
|
||||
if self.test_attn_probs:
|
||||
@@ -220,3 +221,17 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
|
||||
return
|
||||
else:
|
||||
super().check_pt_flax_outputs(fx_outputs, pt_outputs, model_class, tol, name, attributes)
|
||||
|
||||
@is_pt_flax_cross_test
|
||||
@unittest.skip(
|
||||
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
|
||||
)
|
||||
def test_equivalence_flax_to_pt(self):
|
||||
pass
|
||||
|
||||
@is_pt_flax_cross_test
|
||||
@unittest.skip(
|
||||
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
|
||||
)
|
||||
def test_equivalence_pt_to_flax(self):
|
||||
pass
|
||||
|
||||
@@ -158,7 +158,7 @@ class FlaxModelTesterMixin:
|
||||
if "ForMultipleChoice" in model_class.__name__:
|
||||
inputs_dict = {
|
||||
k: jnp.broadcast_to(v[:, None], (v.shape[0], self.model_tester.num_choices, v.shape[-1]))
|
||||
if isinstance(v, (jnp.ndarray, np.ndarray))
|
||||
if isinstance(v, (jnp.ndarray, np.ndarray)) and k != "indices_prng_key"
|
||||
else v
|
||||
for k, v in inputs_dict.items()
|
||||
}
|
||||
@@ -629,7 +629,6 @@ class FlaxModelTesterMixin:
|
||||
def test_hidden_states_output(self):
|
||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
||||
model = model_class(config)
|
||||
|
||||
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
|
||||
|
||||
|
||||
Reference in New Issue
Block a user