Add tf_keras imports to prepare for Keras 3 (#28588)

* Port core files + ESM (because ESM code is odd)

* Search-replace in modelling code

* Fix up transfo_xl as well

* Fix other core files + tests (still need to add correct import to tests)

* Fix cookiecutter

* make fixup, fix imports in some more core files

* Auto-add imports to tests

* Cleanup, add imports to sagemaker tests

* Use correct exception for importing tf_keras

* Fixes in modeling_tf_utils

* make fixup

* Correct version parsing code

* Ensure the pipeline tests correctly revert to float32 after each test

* Ensure the pipeline tests correctly revert to float32 after each test

* More tf.keras -> keras

* Add dtype cast

* Better imports of tf_keras

* Add a cast for tf.assign, just in case

* Fix callback imports
This commit is contained in:
Matt
2024-01-30 17:26:36 +00:00
committed by GitHub
parent 1d489b3e61
commit 415e9a0980
109 changed files with 2801 additions and 2658 deletions

View File

@@ -43,6 +43,7 @@ if is_tf_available():
TFMinLengthLogitsProcessor,
tf_top_k_top_p_filtering,
)
from transformers.modeling_tf_utils import keras
if is_tensorflow_text_available():
import tensorflow_text as text
@@ -254,7 +255,7 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
# file needed to load the TF tokenizer
hf_hub_download(repo_id="google/flan-t5-small", filename="spiece.model", local_dir=tmp_dir)
class CompleteSentenceTransformer(tf.keras.layers.Layer):
class CompleteSentenceTransformer(keras.layers.Layer):
def __init__(self):
super().__init__()
self.tokenizer = text.SentencepieceTokenizer(
@@ -271,9 +272,9 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
return self.tokenizer.detokenize(outputs)
complete_model = CompleteSentenceTransformer()
inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string, name="inputs")
inputs = keras.layers.Input(shape=(1,), dtype=tf.string, name="inputs")
outputs = complete_model(inputs)
keras_model = tf.keras.Model(inputs, outputs)
keras_model = keras.Model(inputs, outputs)
keras_model.save(tmp_dir)
def test_eos_token_id_int_and_list_top_k_top_sampling(self):

View File

@@ -10,6 +10,8 @@ from transformers.testing_utils import require_tensorflow_text, require_tf, slow
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_utils import keras
if is_tensorflow_text_available():
from transformers.models.bert import TFBertTokenizer
@@ -18,8 +20,9 @@ TOKENIZER_CHECKPOINTS = ["bert-base-uncased", "bert-base-cased"]
TINY_MODEL_CHECKPOINT = "hf-internal-testing/tiny-bert-tf-only"
if is_tf_available():
from transformers.modeling_tf_utils import keras
class ModelToSave(tf.keras.Model):
class ModelToSave(keras.Model):
def __init__(self, tokenizer):
super().__init__()
self.tokenizer = tokenizer

View File

@@ -44,6 +44,7 @@ if is_tf_available():
TFBlipTextModel,
TFBlipVisionModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.blip.modeling_tf_blip import TF_BLIP_PRETRAINED_MODEL_ARCHIVE_LIST
@@ -172,9 +173,9 @@ class TFBlipVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -38,6 +38,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFCLIPModel, TFCLIPTextModel, TFCLIPVisionModel, TFSharedEmbeddings
from transformers.modeling_tf_utils import keras
from transformers.models.clip.modeling_tf_clip import TF_CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
@@ -151,9 +152,9 @@ class TFCLIPVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -283,7 +284,7 @@ class TFCLIPVisionModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
@@ -443,7 +444,7 @@ class TFCLIPTextModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
@@ -565,7 +566,7 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
for main_layer_class in tf_main_layer_classes:
@@ -579,17 +580,17 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
@@ -597,10 +598,10 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)

View File

@@ -37,6 +37,7 @@ if is_tf_available():
TFConvBertForTokenClassification,
TFConvBertModel,
)
from transformers.modeling_tf_utils import keras
class TFConvBertModelTester:
@@ -306,7 +307,7 @@ class TFConvBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
if self.is_encoder_decoder:

View File

@@ -29,6 +29,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_utils import keras
from transformers.models.ctrl.modeling_tf_ctrl import (
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFCTRLForSequenceClassification,
@@ -226,18 +227,18 @@ class TFCTRLModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
for model_class in self.all_model_classes:
model = model_class(config)
model.build_in_name_scope() # may be needed for the get_bias() call below
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
assert isinstance(model.get_input_embeddings(), keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
assert isinstance(x, keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
elif model_class in list_other_models_with_output_ebd:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
assert isinstance(x, keras.layers.Layer)
name = model.get_bias()
assert name is None
else:

View File

@@ -22,6 +22,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFCvtForImageClassification, TFCvtModel
from transformers.modeling_tf_utils import keras
from transformers.models.cvt.modeling_tf_cvt import TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST
@@ -191,10 +192,10 @@ class TFCvtModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
@unittest.skip(reason="Get `Failed to determine best cudnn convolution algo.` error after using TF 2.12+cuda 11.8")
def test_keras_fit_mixed_precision(self):
policy = tf.keras.mixed_precision.Policy("mixed_float16")
tf.keras.mixed_precision.set_global_policy(policy)
policy = keras.mixed_precision.Policy("mixed_float16")
keras.mixed_precision.set_global_policy(policy)
super().test_keras_fit()
tf.keras.mixed_precision.set_global_policy("float32")
keras.mixed_precision.set_global_policy("float32")
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -39,6 +39,7 @@ if is_tf_available():
TFData2VecVisionForSemanticSegmentation,
TFData2VecVisionModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.data2vec.modeling_tf_data2vec_vision import (
TF_DATA2VEC_VISION_PRETRAINED_MODEL_ARCHIVE_LIST,
)
@@ -216,9 +217,9 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, PipelineTesterMixin, unittes
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -365,7 +366,7 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, PipelineTesterMixin, unittes
key: val for key, val in prepared_for_class.items() if key not in label_names
}
self.assertGreater(len(inputs_minus_labels), 0)
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), run_eagerly=True)
model.compile(optimizer=keras.optimizers.SGD(0.0), run_eagerly=True)
# Make sure the model fits without crashing regardless of where we pass the labels
history1 = model.fit(

View File

@@ -40,6 +40,7 @@ if is_tf_available():
TFDeiTForMaskedImageModeling,
TFDeiTModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.deit.modeling_tf_deit import TF_DEIT_PRETRAINED_MODEL_ARCHIVE_LIST
@@ -211,9 +212,9 @@ class TFDeiTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Dense))
self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -37,6 +37,7 @@ if is_tf_available():
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.efficientformer.modeling_tf_efficientformer import (
TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
)
@@ -355,7 +356,7 @@ class TFEfficientFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unitte
# These are maximally general inputs for the model, with multiple None dimensions
# Hopefully this will catch any conditionals that fail for flexible shapes
functional_inputs = {
key: tf.keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
for key, val in model.input_signature.items()
if key in model.dummy_inputs
}

View File

@@ -509,7 +509,7 @@ class TFEncoderDecoderMixin:
tf_outputs = tf_model(tf_inputs_dict)
# tf models returned loss is usually a tensor rather than a scalar.
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`)
# (see `hf_compute_loss`: it uses `keras.losses.Reduction.NONE`)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = getattr(tf_outputs, "loss", None)
if tf_loss is not None:

View File

@@ -30,6 +30,7 @@ if is_tf_available():
import numpy
import tensorflow as tf
from transformers.modeling_tf_utils import keras
from transformers.models.esm.modeling_tf_esm import (
TF_ESM_PRETRAINED_MODEL_ARCHIVE_LIST,
TFEsmForMaskedLM,
@@ -269,7 +270,7 @@ class TFEsmModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
assert isinstance(model.get_input_embeddings(), keras.layers.Layer)
if model_class is TFEsmForMaskedLM:
# Output embedding test differs from the main test because they're a matrix, not a layer
name = model.get_bias()

View File

@@ -10,6 +10,7 @@ from transformers.testing_utils import require_keras_nlp, require_tf, slow
if is_tf_available():
import tensorflow as tf
if is_keras_nlp_available():
from transformers.models.gpt2 import TFGPT2Tokenizer

View File

@@ -46,6 +46,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFGroupViTModel, TFGroupViTTextModel, TFGroupViTVisionModel, TFSharedEmbeddings
from transformers.modeling_tf_utils import keras
from transformers.models.groupvit.modeling_tf_groupvit import TF_GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST
@@ -186,9 +187,9 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -340,7 +341,7 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
@@ -505,7 +506,7 @@ class TFGroupViTTextModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
@@ -655,7 +656,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
for main_layer_class in tf_main_layer_classes:
@@ -669,17 +670,17 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
@@ -687,10 +688,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)

View File

@@ -36,6 +36,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import SamProcessor, TFSamModel
from transformers.modeling_tf_utils import keras
if is_vision_available():
from PIL import Image
@@ -322,9 +323,9 @@ class TFSamModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Dense))
self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -34,6 +34,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_utils import keras
from transformers.models.swin.modeling_tf_swin import (
TF_SWIN_PRETRAINED_MODEL_ARCHIVE_LIST,
TFSwinForImageClassification,
@@ -237,9 +238,9 @@ class TFSwinModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), tf.keras.layers.Layer)
self.assertIsInstance(model.get_input_embeddings(), keras.layers.Layer)
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Dense))
self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -442,7 +442,7 @@ class TFVisionEncoderDecoderMixin:
tf_outputs = tf_model(tf_inputs_dict)
# tf models returned loss is usually a tensor rather than a scalar.
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`)
# (see `hf_compute_loss`: it uses `keras.losses.Reduction.NONE`)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = getattr(tf_outputs, "loss", None)
if tf_loss is not None:

View File

@@ -33,6 +33,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFViTForImageClassification, TFViTModel
from transformers.modeling_tf_utils import keras
if is_vision_available():
@@ -188,9 +189,9 @@ class TFViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -41,6 +41,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFViTMAEForPreTraining, TFViTMAEModel
from transformers.modeling_tf_utils import keras
if is_vision_available():
@@ -188,9 +189,9 @@ class TFViTMAEModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -301,7 +302,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
@@ -314,19 +315,17 @@ class TFViTMAEModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCa
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
model = tf.keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
model = keras.models.load_model(filepath, custom_objects={main_layer_class.__name__: main_layer_class})
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)

View File

@@ -5,10 +5,24 @@ import time
import tensorflow as tf
from datasets import load_dataset
from packaging.version import parse
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
try:
import tf_keras as keras
except (ModuleNotFoundError, ImportError):
import keras
if parse(keras.__version__).major > 2:
raise ValueError(
"Your currently installed version of Keras is Keras 3, but this is not yet supported in "
"Transformers. Please install the backwards-compatible tf-keras package with "
"`pip install tf-keras`."
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
@@ -75,9 +89,9 @@ if __name__ == "__main__":
)
# fine optimizer and loss
optimizer = tf.keras.optimizers.Adam(learning_rate=args.learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
optimizer = keras.optimizers.Adam(learning_rate=args.learning_rate)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [keras.metrics.SparseCategoricalAccuracy()]
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
start_train_time = time.time()

View File

@@ -9,6 +9,7 @@ from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from transformers.modeling_tf_utils import keras
from transformers.utils import is_sagemaker_dp_enabled
@@ -135,9 +136,9 @@ if __name__ == "__main__":
)
# fine optimizer and loss
optimizer = tf.keras.optimizers.Adam(learning_rate=args.learning_rate)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
optimizer = keras.optimizers.Adam(learning_rate=args.learning_rate)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [keras.metrics.SparseCategoricalAccuracy()]
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
# Training

View File

@@ -80,6 +80,7 @@ if is_tf_available():
TFSampleDecoderOnlyOutput,
TFSampleEncoderDecoderOutput,
)
from transformers.modeling_tf_utils import keras
tf.config.experimental.enable_tensor_float_32_execution(False)
@@ -365,7 +366,7 @@ class TFModelTesterMixin:
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
for main_layer_class in tf_main_layer_classes:
@@ -379,17 +380,17 @@ class TFModelTesterMixin:
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
@@ -397,10 +398,10 @@ class TFModelTesterMixin:
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)
@@ -610,7 +611,7 @@ class TFModelTesterMixin:
tf_outputs = tf_model(tf_inputs_dict)
# tf models returned loss is usually a tensor rather than a scalar.
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`)
# (see `hf_compute_loss`: it uses `keras.losses.Reduction.NONE`)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = getattr(tf_outputs, "loss", None)
if tf_loss is not None:
@@ -697,7 +698,7 @@ class TFModelTesterMixin:
# These are maximally general inputs for the model, with multiple None dimensions
# Hopefully this will catch any conditionals that fail for flexible shapes
functional_inputs = {
key: tf.keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
for key, val in model.input_signature.items()
if key in model.dummy_inputs
}
@@ -706,7 +707,7 @@ class TFModelTesterMixin:
hidden_states = outputs_dict[0]
# Compile extended model
functional_model = tf.keras.Model(inputs=functional_inputs, outputs=hidden_states)
functional_model = keras.Model(inputs=functional_inputs, outputs=hidden_states)
model_out = functional_model.predict(model.dummy_inputs) # Check we can pass inputs with the Keras API
self.assertTrue(model_out is not None)
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -918,12 +919,12 @@ class TFModelTesterMixin:
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), tf.keras.layers.Layer)
self.assertIsInstance(model.get_input_embeddings(), keras.layers.Layer)
legacy_text_in_text_out = model.get_lm_head() is not None
if model_class in text_in_text_out_models or legacy_text_in_text_out:
out_embeddings = model.get_output_embeddings()
self.assertIsInstance(out_embeddings, tf.keras.layers.Layer)
self.assertIsInstance(out_embeddings, keras.layers.Layer)
bias = model.get_bias()
if bias is not None:
self.assertIsInstance(bias, dict)
@@ -931,7 +932,7 @@ class TFModelTesterMixin:
self.assertIsInstance(v, tf.Variable)
elif model_class in speech_in_text_out_models:
out_embeddings = model.get_output_embeddings()
self.assertIsInstance(out_embeddings, tf.keras.layers.Layer)
self.assertIsInstance(out_embeddings, keras.layers.Layer)
bias = model.get_bias()
self.assertIsNone(bias)
else:
@@ -1079,14 +1080,14 @@ class TFModelTesterMixin:
def test_resize_token_embeddings(self):
# TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on
# tf.keras.layers.Embedding
# keras.layers.Embedding
if not self.test_resize_embeddings:
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if isinstance(embedding_layer, tf.keras.layers.Embedding):
if isinstance(embedding_layer, keras.layers.Embedding):
# builds the embeddings layer
model.build_in_name_scope()
return embedding_layer.embeddings
@@ -1456,7 +1457,7 @@ class TFModelTesterMixin:
]
for accuracy_class in accuracy_classes:
if model.__class__.__name__.endswith(accuracy_class):
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]
metrics = [keras.metrics.SparseCategoricalAccuracy()]
break
else:
metrics = []
@@ -1472,7 +1473,7 @@ class TFModelTesterMixin:
model_weights = model.get_weights()
# Run eagerly to save some expensive compilation times
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), run_eagerly=True, metrics=metrics)
model.compile(optimizer=keras.optimizers.SGD(0.0), run_eagerly=True, metrics=metrics)
# Make sure the model fits without crashing regardless of where we pass the labels
history1 = model.fit(
prepared_for_class,
@@ -1557,7 +1558,7 @@ class TFModelTesterMixin:
# After testing that the model accepts all int inputs, confirm that its dummies are int32
for key, tensor in model.dummy_inputs.items():
self.assertTrue(
isinstance(tensor, tf.Tensor) or tf.keras.backend.is_keras_tensor(tensor),
isinstance(tensor, tf.Tensor) or keras.backend.is_keras_tensor(tensor),
"Dummy inputs should be tf.Tensor!",
)
if tensor.dtype.is_integer:

View File

@@ -64,7 +64,7 @@ if is_tf_available():
TFPreTrainedModel,
TFRagModel,
)
from transformers.modeling_tf_utils import tf_shard_checkpoint, unpack_inputs
from transformers.modeling_tf_utils import keras, tf_shard_checkpoint, unpack_inputs
from transformers.tf_utils import stable_softmax
tf.config.experimental.enable_tensor_float_32_execution(False)
@@ -282,12 +282,12 @@ class TFModelUtilsTest(unittest.TestCase):
def test_shard_checkpoint(self):
# This is the model we will use, total size 340,000 bytes.
model = tf.keras.Sequential(
model = keras.Sequential(
[
tf.keras.layers.Dense(200, use_bias=False), # size 80,000
tf.keras.layers.Dense(200, use_bias=False), # size 160,000
tf.keras.layers.Dense(100, use_bias=False), # size 80,000
tf.keras.layers.Dense(50, use_bias=False), # size 20,000
keras.layers.Dense(200, use_bias=False), # size 80,000
keras.layers.Dense(200, use_bias=False), # size 160,000
keras.layers.Dense(100, use_bias=False), # size 80,000
keras.layers.Dense(50, use_bias=False), # size 20,000
]
)
inputs = tf.zeros((1, 100), dtype=tf.float32)
@@ -429,13 +429,13 @@ class TFModelUtilsTest(unittest.TestCase):
# Using default signature (default behavior) overrides 'serving_default'
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, saved_model=True, signatures=None)
model_loaded = tf.keras.models.load_model(f"{tmp_dir}/saved_model/1")
model_loaded = keras.models.load_model(f"{tmp_dir}/saved_model/1")
self.assertTrue("serving_default" in list(model_loaded.signatures.keys()))
# Providing custom signature function
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, saved_model=True, signatures={"custom_signature": serving_fn})
model_loaded = tf.keras.models.load_model(f"{tmp_dir}/saved_model/1")
model_loaded = keras.models.load_model(f"{tmp_dir}/saved_model/1")
self.assertTrue("custom_signature" in list(model_loaded.signatures.keys()))
# Providing multiple custom signature function
@@ -445,7 +445,7 @@ class TFModelUtilsTest(unittest.TestCase):
saved_model=True,
signatures={"custom_signature_1": serving_fn, "custom_signature_2": serving_fn},
)
model_loaded = tf.keras.models.load_model(f"{tmp_dir}/saved_model/1")
model_loaded = keras.models.load_model(f"{tmp_dir}/saved_model/1")
self.assertTrue("custom_signature_1" in list(model_loaded.signatures.keys()))
self.assertTrue("custom_signature_2" in list(model_loaded.signatures.keys()))

View File

@@ -46,6 +46,7 @@ if is_tf_available():
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TFSharedEmbeddings,
)
from transformers.modeling_tf_utils import keras
if _tf_gpu_memory_limit is not None:
gpus = tf.config.list_physical_devices("GPU")
@@ -169,7 +170,7 @@ class TFCoreModelTesterMixin:
self.assertGreater(len(inputs_minus_labels), 0)
# Make sure it works with XLA!
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), jit_compile=True)
model.compile(optimizer=keras.optimizers.SGD(0.0), jit_compile=True)
# Make sure the model fits without crashing regardless of where we pass the labels
history = model.fit(
prepared_for_class,
@@ -186,7 +187,7 @@ class TFCoreModelTesterMixin:
# Now test it with separate labels, to make sure that path works in XLA too.
model = model_class(config)
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), jit_compile=True)
model.compile(optimizer=keras.optimizers.SGD(0.0), jit_compile=True)
history = model.fit(
inputs_minus_labels,
labels,
@@ -234,7 +235,7 @@ class TFCoreModelTesterMixin:
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
if self.is_encoder_decoder:
@@ -264,7 +265,7 @@ class TFCoreModelTesterMixin:
@slow
def test_mixed_precision(self):
tf.keras.mixed_precision.set_global_policy("mixed_float16")
keras.mixed_precision.set_global_policy("mixed_float16")
# try/finally block to ensure subsequent tests run in float32
try:
@@ -276,7 +277,7 @@ class TFCoreModelTesterMixin:
self.assertIsNotNone(outputs)
finally:
tf.keras.mixed_precision.set_global_policy("float32")
keras.mixed_precision.set_global_policy("float32")
@slow
def test_train_pipeline_custom_model(self):
@@ -296,7 +297,7 @@ class TFCoreModelTesterMixin:
if module_member_name.endswith("MainLayer")
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
@@ -311,7 +312,7 @@ class TFCoreModelTesterMixin:
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
if hasattr(self.model_tester, "num_labels"):
@@ -324,8 +325,8 @@ class TFCoreModelTesterMixin:
).batch(1)
hidden_states = main_layer(symbolic_inputs)[0]
outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])
outputs = keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
model = keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"])
model.fit(X, epochs=1)
@@ -334,7 +335,7 @@ class TFCoreModelTesterMixin:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
@@ -342,10 +343,10 @@ class TFCoreModelTesterMixin:
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
model(inputs_dict)
@slow