From 6bed0647febc745c688d3fb0ccb88bfcf3b05e97 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 13 Apr 2022 17:19:52 +0200 Subject: [PATCH] Reduce Funnel PT/TF diff (#16744) * Make Funnel Test less flaky Co-authored-by: ydshieh --- src/transformers/models/funnel/modeling_tf_funnel.py | 4 ++-- tests/funnel/test_modeling_funnel.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index c1ddef0ad9..d68b52c7bc 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -84,7 +84,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): self.vocab_size = config.vocab_size self.hidden_size = config.hidden_size - self.initializer_range = config.initializer_range + self.initializer_std = 1.0 if config.initializer_std is None else config.initializer_std self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm") self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout) @@ -94,7 +94,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): self.weight = self.add_weight( name="weight", shape=[self.vocab_size, self.hidden_size], - initializer=get_initializer(initializer_range=self.initializer_range), + initializer=get_initializer(initializer_range=self.initializer_std), ) super().build(input_shape) diff --git a/tests/funnel/test_modeling_funnel.py b/tests/funnel/test_modeling_funnel.py index 481593ac23..73f5ec4b17 100644 --- a/tests/funnel/test_modeling_funnel.py +++ b/tests/funnel/test_modeling_funnel.py @@ -65,6 +65,7 @@ class FunnelModelTester: activation_dropout=0.0, max_position_embeddings=512, type_vocab_size=3, + initializer_std=0.02, # Set to a smaller value, so we can keep the small error threshold (1e-5) in the test num_labels=3, num_choices=4, scope=None, @@ -94,6 +95,7 @@ class FunnelModelTester: self.num_labels = num_labels self.num_choices = num_choices self.scope = scope + self.initializer_std = initializer_std # Used in the tests to check the size of the first attention layer self.num_attention_heads = n_head @@ -154,6 +156,7 @@ class FunnelModelTester: activation_dropout=self.activation_dropout, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, + initializer_std=self.initializer_std, ) def create_and_check_model(