* Add GeLU10 (clipped version of GeLU) to transformers to improve quantization performances.

* Add unittests.

* Import tensorflow after `is_tf_available` check.

* Fix tensorflow wrong function `tf.tensor` to `tf.constant`

* style.

* use `tf.math.max`

* Fix tf tests.

* style.

* style style style style style style

* style style style style style style

* Address @sgugger comments.

* Fix wrong operator for raising ValueError for ClippedGELUActivation.
This commit is contained in:
Funtowicz Morgan
2022-02-22 18:21:16 +01:00
committed by GitHub
parent 2c3fcc647a
commit 32295b15a1
4 changed files with 73 additions and 0 deletions

View File

@@ -32,6 +32,19 @@ class TestActivations(unittest.TestCase):
self.assertTrue(torch.allclose(gelu_python(x), torch_builtin(x)))
self.assertFalse(torch.allclose(gelu_python(x), gelu_new(x)))
def test_gelu_10(self):
x = torch.tensor([-100, -1, -0.1, 0, 0.1, 1.0, 100])
torch_builtin = get_activation("gelu")
gelu10 = get_activation("gelu_10")
y_gelu = torch_builtin(x)
y_gelu_10 = gelu10(x)
clipped_mask = torch.where(y_gelu_10 < 10.0, 1, 0)
self.assertTrue(torch.max(y_gelu_10).item() == 10.0)
self.assertTrue(torch.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
def test_get_activation(self):
get_activation("swish")
get_activation("silu")
@@ -40,6 +53,7 @@ class TestActivations(unittest.TestCase):
get_activation("gelu_new")
get_activation("gelu_fast")
get_activation("gelu_python")
get_activation("gelu_10")
get_activation("quick_gelu")
get_activation("mish")
get_activation("linear")

View File

@@ -14,16 +14,33 @@
import unittest
import numpy as np
from transformers import is_tf_available
from transformers.testing_utils import require_tf
if is_tf_available():
import tensorflow as tf
from transformers.activations_tf import get_tf_activation
@require_tf
class TestTFActivations(unittest.TestCase):
def test_gelu_10(self):
x = tf.constant([-100, -1.0, -0.1, 0, 0.1, 1.0, 100.0])
gelu = get_tf_activation("gelu")
gelu10 = get_tf_activation("gelu_10")
y_gelu = gelu(x)
y_gelu_10 = gelu10(x)
clipped_mask = tf.where(y_gelu_10 < 10.0, 1.0, 0.0)
self.assertEqual(tf.math.reduce_max(y_gelu_10).numpy().item(), 10.0)
self.assertTrue(np.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask))
def test_get_activation(self):
get_tf_activation("swish")
get_tf_activation("silu")
@@ -32,6 +49,7 @@ class TestTFActivations(unittest.TestCase):
get_tf_activation("tanh")
get_tf_activation("gelu_new")
get_tf_activation("gelu_fast")
get_tf_activation("gelu_10")
get_tf_activation("mish")
get_tf_activation("quick_gelu")
get_tf_activation("glu")