Gelu10 (#15676)
* Add GeLU10 (clipped version of GeLU) to transformers to improve quantization performances. * Add unittests. * Import tensorflow after `is_tf_available` check. * Fix tensorflow wrong function `tf.tensor` to `tf.constant` * style. * use `tf.math.max` * Fix tf tests. * style. * style style style style style style * style style style style style style * Address @sgugger comments. * Fix wrong operator for raising ValueError for ClippedGELUActivation.
This commit is contained in:
@@ -32,6 +32,19 @@ class TestActivations(unittest.TestCase):
|
||||
self.assertTrue(torch.allclose(gelu_python(x), torch_builtin(x)))
|
||||
self.assertFalse(torch.allclose(gelu_python(x), gelu_new(x)))
|
||||
|
||||
def test_gelu_10(self):
|
||||
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
|
||||
torch_builtin = get_activation("gelu")
|
||||
gelu10 = get_activation("gelu_10")
|
||||
|
||||
y_gelu = torch_builtin(x)
|
||||
y_gelu_10 = gelu10(x)
|
||||
|
||||
clipped_mask = torch.where(y_gelu_10 < 10.0, 1, 0)
|
||||
|
||||
self.assertTrue(torch.max(y_gelu_10).item() == 10.0)
|
||||
self.assertTrue(torch.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
|
||||
|
||||
def test_get_activation(self):
|
||||
get_activation("swish")
|
||||
get_activation("silu")
|
||||
@@ -40,6 +53,7 @@ class TestActivations(unittest.TestCase):
|
||||
get_activation("gelu_new")
|
||||
get_activation("gelu_fast")
|
||||
get_activation("gelu_python")
|
||||
get_activation("gelu_10")
|
||||
get_activation("quick_gelu")
|
||||
get_activation("mish")
|
||||
get_activation("linear")
|
||||
|
||||
@@ -14,16 +14,33 @@
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import is_tf_available
|
||||
from transformers.testing_utils import require_tf
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers.activations_tf import get_tf_activation
|
||||
|
||||
|
||||
@require_tf
|
||||
class TestTFActivations(unittest.TestCase):
|
||||
def test_gelu_10(self):
|
||||
x = tf.constant([-100, -1.0, -0.1, 0, 0.1, 1.0, 100.0])
|
||||
gelu = get_tf_activation("gelu")
|
||||
gelu10 = get_tf_activation("gelu_10")
|
||||
|
||||
y_gelu = gelu(x)
|
||||
y_gelu_10 = gelu10(x)
|
||||
|
||||
clipped_mask = tf.where(y_gelu_10 < 10.0, 1.0, 0.0)
|
||||
|
||||
self.assertEqual(tf.math.reduce_max(y_gelu_10).numpy().item(), 10.0)
|
||||
self.assertTrue(np.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
|
||||
|
||||
def test_get_activation(self):
|
||||
get_tf_activation("swish")
|
||||
get_tf_activation("silu")
|
||||
@@ -32,6 +49,7 @@ class TestTFActivations(unittest.TestCase):
|
||||
get_tf_activation("tanh")
|
||||
get_tf_activation("gelu_new")
|
||||
get_tf_activation("gelu_fast")
|
||||
get_tf_activation("gelu_10")
|
||||
get_tf_activation("mish")
|
||||
get_tf_activation("quick_gelu")
|
||||
get_tf_activation("glu")
|
||||
|
||||
Reference in New Issue
Block a user