Tensorflow improvements (#4530)
* Better None gradients handling * Apply Style * Apply Style * Create a loss class per task to compute its respective loss * Add loss classes to the ALBERT TF models * Add loss classes to the BERT TF models * Add question answering and multiple choice to TF Camembert * Remove prints * Add multiple choice model to TF DistilBERT + loss computation * Add question answering model to TF Electra + loss computation * Add token classification, question answering and multiple choice models to TF Flaubert * Add multiple choice model to TF Roberta + loss computation * Add multiple choice model to TF XLM + loss computation * Add multiple choice and question answering models to TF XLM-Roberta * Add multiple choice model to TF XLNet + loss computation * Remove unused parameters * Add task loss classes * Reorder TF imports + add new model classes * Add new model classes * Bugfix in TF T5 model * Bugfix for TF T5 tests * Bugfix in TF T5 model * Fix TF T5 model tests * Fix T5 tests + some renaming * Fix inheritance issue in the AutoX tests * Add tests for TF Flaubert and TF XLM Roberta * Add tests for TF Flaubert and TF XLM Roberta * Remove unused piece of code in the TF trainer * bugfix and remove unused code * Bugfix for TF 2.2 * Apply Style * Divide TFSequenceClassificationAndMultipleChoiceLoss into their two respective name * Apply style * Mirror the PT Trainer in the TF one: fp16, optimizers and tb_writer as class parameter and better dataset handling * Fix TF optimizations tests and apply style * Remove useless parameter * Bugfix and apply style * Fix TF Trainer prediction * Now the TF models return the loss such as their PyTorch couterparts * Apply Style * Ignore some tests output * Take into account the SQuAD cls_index, p_mask and is_impossible parameters for the QuestionAnswering task models. * Fix names for SQuAD data * Apply Style * Fix conflicts with 2.11 release * Fix conflicts with 2.11 * Fix wrongname * Add better documentation on the new create_optimizer function * Fix isort * logging_dir: use same default as PyTorch Co-authored-by: Julien Chaumond <chaumond@gmail.com>
This commit is contained in:
@@ -30,7 +30,7 @@ if is_tf_available():
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding
|
||||
from transformers import tf_top_k_top_p_filtering, TFAdaptiveEmbedding, TFSharedEmbeddings
|
||||
|
||||
if _tf_gpu_memory_limit is not None:
|
||||
gpus = tf.config.list_physical_devices("GPU")
|
||||
@@ -107,26 +107,45 @@ class TFModelTesterMixin:
|
||||
and getattr(module_member, "_keras_serializable", False)
|
||||
)
|
||||
for main_layer_class in tf_main_layer_classes:
|
||||
main_layer = main_layer_class(config)
|
||||
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
|
||||
if "T5" in main_layer_class.__name__:
|
||||
# Take the same values than in TFT5ModelTester for this shared layer
|
||||
shared = TFSharedEmbeddings(99, 32, name="shared")
|
||||
main_layer = main_layer_class(config, embed_tokens=shared)
|
||||
else:
|
||||
main_layer = main_layer_class(config)
|
||||
symbolic_inputs = {
|
||||
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
|
||||
}
|
||||
|
||||
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
|
||||
outputs = model(inputs_dict)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
filepath = os.path.join(tmpdirname, "keras_model.h5")
|
||||
model.save(filepath)
|
||||
model = tf.keras.models.load_model(
|
||||
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
|
||||
)
|
||||
if "T5" in main_layer_class.__name__:
|
||||
model = tf.keras.models.load_model(
|
||||
filepath,
|
||||
custom_objects={
|
||||
main_layer_class.__name__: main_layer_class,
|
||||
"TFSharedEmbeddings": TFSharedEmbeddings,
|
||||
},
|
||||
)
|
||||
else:
|
||||
model = tf.keras.models.load_model(
|
||||
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
|
||||
)
|
||||
assert isinstance(model, tf.keras.Model)
|
||||
after_outputs = model(inputs_dict)
|
||||
self.assert_outputs_same(after_outputs, outputs)
|
||||
|
||||
def assert_outputs_same(self, after_outputs, outputs):
|
||||
# Make sure we don't have nans
|
||||
out_1 = after_outputs[0].numpy()
|
||||
if isinstance(after_outputs, tf.Tensor):
|
||||
out_1 = after_outputs.numpy()
|
||||
else:
|
||||
out_1 = after_outputs[0].numpy()
|
||||
out_2 = outputs[0].numpy()
|
||||
self.assertEqual(out_1.shape, out_2.shape)
|
||||
out_1 = out_1[~np.isnan(out_1)]
|
||||
@@ -269,7 +288,6 @@ class TFModelTesterMixin:
|
||||
inputs_keywords = copy.deepcopy(inputs_dict)
|
||||
input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
|
||||
outputs_keywords = model(input_ids, **inputs_keywords)
|
||||
|
||||
output_dict = outputs_dict[0].numpy()
|
||||
output_keywords = outputs_keywords[0].numpy()
|
||||
|
||||
|
||||
54
tests/test_modeling_tf_flaubert.py
Normal file
54
tests/test_modeling_tf_flaubert.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_tf_available
|
||||
|
||||
from .utils import require_tf, slow
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from transformers import TFFlaubertModel
|
||||
|
||||
|
||||
@require_tf
|
||||
class TFFlaubertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_output_embeds_base_model(self):
|
||||
model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased")
|
||||
|
||||
input_ids = tf.convert_to_tensor(
|
||||
[[0, 158, 735, 2592, 1424, 6727, 82, 1]], dtype=tf.int32,
|
||||
) # "J'aime flaubert !"
|
||||
|
||||
output = model(input_ids)[0]
|
||||
expected_shape = tf.TensorShape((1, 8, 512))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# compare the actual values for a slice.
|
||||
expected_slice = tf.convert_to_tensor(
|
||||
[
|
||||
[
|
||||
[-1.8768773, -1.566555, 0.27072418],
|
||||
[-1.6920038, -0.5873505, 1.9329599],
|
||||
[-2.9563985, -1.6993835, 1.7972052],
|
||||
]
|
||||
],
|
||||
dtype=tf.float32,
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
|
||||
55
tests/test_modeling_tf_xlm_roberta.py
Normal file
55
tests/test_modeling_tf_xlm_roberta.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import is_tf_available
|
||||
|
||||
from .utils import require_tf, slow
|
||||
|
||||
|
||||
if is_tf_available():
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from transformers import TFXLMRobertaModel
|
||||
|
||||
|
||||
@require_tf
|
||||
class TFFlaubertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_output_embeds_base_model(self):
|
||||
model = TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-base")
|
||||
|
||||
features = {
|
||||
"input_ids": tf.convert_to_tensor([[0, 2646, 10269, 83, 99942, 2]], dtype=tf.int32), # "My dog is cute"
|
||||
"attention_mask": tf.convert_to_tensor([[1, 1, 1, 1, 1, 1]], dtype=tf.int32),
|
||||
}
|
||||
|
||||
output = model(features)[0]
|
||||
expected_shape = tf.TensorShape((1, 6, 768))
|
||||
self.assertEqual(output.shape, expected_shape)
|
||||
# compare the actual values for a slice.
|
||||
expected_slice = tf.convert_to_tensor(
|
||||
[
|
||||
[
|
||||
[0.0681762, 0.10894451, 0.06772504],
|
||||
[-0.06423668, 0.02366615, 0.04329344],
|
||||
[-0.06057295, 0.09974135, -0.00070584],
|
||||
]
|
||||
],
|
||||
dtype=tf.float32,
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
|
||||
@@ -47,7 +47,7 @@ class OptimizationFTest(unittest.TestCase):
|
||||
with strategy.scope():
|
||||
accumulator = GradientAccumulator()
|
||||
variable = tf.Variable([4.0, 3.0])
|
||||
optimizer = create_optimizer(5e-5, 10, 5)
|
||||
optimizer, _ = create_optimizer(5e-5, 10, 5)
|
||||
gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False)
|
||||
|
||||
def accumulate_on_replica(gradient):
|
||||
|
||||
Reference in New Issue
Block a user