TF version compatibility fixes (#23663)

* New TF version compatibility fixes * Remove dummy print statement, move expand_1d * Make a proper framework inference function * Make a proper framework inference function * ValueError -> TypeError
2023-05-23 16:42:11 +01:00
parent 42baa58f90
commit 876d9a32c6
5 changed files with 128 additions and 30 deletions
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -38,7 +38,7 @@ from .activations_tf import get_tf_activation
 from .configuration_utils import PretrainedConfig
 from .dynamic_module_utils import custom_object_save
 from .generation import GenerationConfig, TFGenerationMixin
-from .tf_utils import shape_list
+from .tf_utils import expand_1d, load_attributes_from_hdf5_group, save_attributes_to_hdf5_group, shape_list
 from .utils import (
    DUMMY_INPUTS,
    SAFE_WEIGHTS_INDEX_NAME,
@@ -65,16 +65,15 @@ from .utils import (
 from .utils.hub import convert_file_size_to_int, get_checkpoint_shard_files
-if parse(tf.__version__) >= parse("2.11.0"):
+if parse(tf.__version__).minor >= 13:
    from keras import backend as K
    from keras.__internal__ import KerasTensor
 elif parse(tf.__version__).minor >= 11:
    from keras import backend as K
    from keras.engine import data_adapter
    from keras.engine.keras_tensor import KerasTensor
    from keras.saving.legacy import hdf5_format
 else:
    from tensorflow.python.keras import backend as K
    from tensorflow.python.keras.engine import data_adapter
    from tensorflow.python.keras.engine.keras_tensor import KerasTensor
    from tensorflow.python.keras.saving import hdf5_format
 if is_safetensors_available():
@@ -797,9 +796,7 @@ def load_tf_shard(model, model_layer_map, resolved_archive_file, ignore_mismatch
    try:
        with h5py.File(resolved_archive_file, "r") as sharded_checkpoint_file:
            # Retrieve the name of each layer from the H5 file
-            saved_h5_model_layers_name = set(
+            saved_h5_model_layers_name = set(load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names"))
                hdf5_format.load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names")
            )
            weight_value_tuples = []
            # Compute missing and unexpected sub layers
@@ -898,9 +895,7 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size
    # Read the H5 file
    with h5py.File(resolved_archive_file, "r") as sharded_checkpoint_file:
        # Retrieve the name of each layer from the H5 file
-        saved_h5_model_layers_name = set(
+        saved_h5_model_layers_name = set(load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names"))
            hdf5_format.load_attributes_from_hdf5_group(sharded_checkpoint_file, "layer_names")
        )
        # Find the missing layers from the high level list of layers
        missing_layers = list({layer.name for layer in model.layers} - saved_h5_model_layers_name)
@@ -924,7 +919,7 @@ def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_size
                # Create a dict from the H5 saved model that looks like {"weight_name": weight_value}
                # And a set with only the names
-                for weight_name in hdf5_format.load_attributes_from_hdf5_group(h5_layer_object, "weight_names"):
+                for weight_name in load_attributes_from_hdf5_group(h5_layer_object, "weight_names"):
                    # TF names always start with the model name so we ignore it
                    name = "/".join(weight_name.split("/")[1:])
@@ -1528,8 +1523,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
        output_to_label = {val: key for key, val in label_to_output.items()}
        if not self._using_dummy_loss and parse(tf.__version__) < parse("2.11.0"):
            # Newer TF train steps leave this out
-            data = data_adapter.expand_1d(data)
+            data = expand_1d(data)
-        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
+        x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data)
        # If the inputs are mutable dictionaries, make a shallow copy of them because we will modify
        # them during input/label pre-processing. This avoids surprising the user by wrecking their data.
        # In addition, modifying mutable Python inputs makes XLA compilation impossible.
@@ -1635,8 +1630,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
        output_to_label = {val: key for key, val in label_to_output.items()}
        if not self._using_dummy_loss and parse(tf.__version__) < parse("2.11.0"):
            # Newer versions leave this out
-            data = data_adapter.expand_1d(data)
+            data = expand_1d(data)
-        x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data)
+        x, y, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data)
        # If the inputs are mutable dictionaries, make a shallow copy of them because we will modify
        # them during input/label pre-processing. This avoids surprising the user by wrecking their data.
        # In addition, modifying mutable Python inputs makes XLA compilation impossible.
@@ -2402,7 +2397,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
                        )
                        param_dset[:] = layer.numpy()
                        layers.append(layer_name.encode("utf8"))
-                    hdf5_format.save_attributes_to_hdf5_group(shard_file, "layer_names", layers)
+                    save_attributes_to_hdf5_group(shard_file, "layer_names", layers)
        if push_to_hub:
            self._upload_modified_files(
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -15,7 +15,6 @@
 import collections
 import csv
 import importlib
 import inspect
 import json
 import os
 import pickle
@@ -36,7 +35,7 @@ from ..image_processing_utils import BaseImageProcessor
 from ..modelcard import ModelCard
 from ..models.auto.configuration_auto import AutoConfig
 from ..tokenization_utils import PreTrainedTokenizer
-from ..utils import ModelOutput, add_end_docstrings, is_tf_available, is_torch_available, logging
+from ..utils import ModelOutput, add_end_docstrings, infer_framework, is_tf_available, is_torch_available, logging
 GenericTensor = Union[List["GenericTensor"], "torch.Tensor", "tf.Tensor"]
@@ -278,7 +277,7 @@ def infer_framework_load_model(
        if isinstance(model, str):
            raise ValueError(f"Could not load model {model} with any of the following classes: {class_tuple}.")
-    framework = "tf" if "keras.engine.training.Model" in str(inspect.getmro(model.__class__)) else "pt"
+    framework = infer_framework(model.__class__)
    return framework, model
@@ -351,7 +350,7 @@ def get_framework(model, revision: Optional[str] = None):
            except OSError:
                model = TFAutoModel.from_pretrained(model, revision=revision)
-    framework = "tf" if "keras.engine.training.Model" in str(inspect.getmro(model.__class__)) else "pt"
+    framework = infer_framework(model.__class__)
    return framework
--- a/src/transformers/tf_utils.py
+++ b/src/transformers/tf_utils.py
@@ -166,3 +166,90 @@ def check_embeddings_within_bounds(tensor: tf.Tensor, embed_dim: int, tensor_nam
            f"layer's input dimension ({embed_dim}). The likely cause is some problem at tokenization time."
        ),
    )
 def save_attributes_to_hdf5_group(group, name, data):
    """Saves attributes (data) of the specified name into the HDF5 group.
    This method deals with an inherent problem of HDF5 file which is not able to store data larger than
    HDF5_OBJECT_HEADER_LIMIT bytes.
    Args:
        group: A pointer to a HDF5 group.
        name: A name of the attributes to save.
        data: Attributes data to store.
    Raises:
      RuntimeError: If any single attribute is too large to be saved.
    Copied from Keras to Transformers to avoid versioning issues.
    """
    HDF5_OBJECT_HEADER_LIMIT = 64512
    # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT`
    # because in that case even chunking the array would not make the saving
    # possible.
    bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT]
    # Expecting this to never be true.
    if bad_attributes:
        raise RuntimeError(
            "The following attributes cannot be saved to HDF5 file because "
            f"they are larger than {HDF5_OBJECT_HEADER_LIMIT} "
            f"bytes: {bad_attributes}"
        )
    data_npy = np.asarray(data)
    num_chunks = 1
    chunked_data = np.array_split(data_npy, num_chunks)
    # This will never loop forever thanks to the test above.
    while any(x.nbytes > HDF5_OBJECT_HEADER_LIMIT for x in chunked_data):
        num_chunks += 1
        chunked_data = np.array_split(data_npy, num_chunks)
    if num_chunks > 1:
        for chunk_id, chunk_data in enumerate(chunked_data):
            group.attrs["%s%d" % (name, chunk_id)] = chunk_data
    else:
        group.attrs[name] = data
 def load_attributes_from_hdf5_group(group, name):
    """Loads attributes of the specified name from the HDF5 group.
    This method deals with an inherent problem of HDF5 file which is not able to store data larger than
    HDF5_OBJECT_HEADER_LIMIT bytes.
    Args:
        group: A pointer to a HDF5 group.
        name: A name of the attributes to load.
    Returns:
        data: Attributes data.
    Copied from Keras to Transformers to avoid versioning issues.
    """
    if name in group.attrs:
        data = [n.decode("utf8") if hasattr(n, "decode") else n for n in group.attrs[name]]
    else:
        data = []
        chunk_id = 0
        while "%s%d" % (name, chunk_id) in group.attrs:
            data.extend(
                [n.decode("utf8") if hasattr(n, "decode") else n for n in group.attrs["%s%d" % (name, chunk_id)]]
            )
            chunk_id += 1
    return data
 def expand_1d(data):
    """Expands 1-dimensional `Tensor`s into 2-dimensional `Tensor`s.
    Copied from Keras to here to avoid versioning issues."""
    def _expand_single_1d_tensor(t):
        if isinstance(t, tf.Tensor) and t.shape.rank == 1:
            return tf.expand_dims(t, axis=-1)
        return t
    return tf.nest.map_structure(_expand_single_1d_tensor, data)
--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@@ -39,6 +39,7 @@ from .generic import (
    expand_dims,
    find_labels,
    flatten_dict,
    infer_framework,
    is_jax_tensor,
    is_numpy_array,
    is_tensor,
--- a/src/transformers/utils/generic.py
+++ b/src/transformers/utils/generic.py
@@ -398,11 +398,10 @@ def can_return_loss(model_class):
    Args:
        model_class (`type`): The class of the model.
    """
-    base_classes = str(inspect.getmro(model_class))
+    framework = infer_framework(model_class)
-
+    if framework == "tf":
    if "keras.engine.training.Model" in base_classes:
        signature = inspect.signature(model_class.call)  # TensorFlow models
-    elif "torch.nn.modules.module.Module" in base_classes:
+    elif framework == "pt":
        signature = inspect.signature(model_class.forward)  # PyTorch models
    else:
        signature = inspect.signature(model_class.__call__)  # Flax models
@@ -422,11 +421,10 @@ def find_labels(model_class):
        model_class (`type`): The class of the model.
    """
    model_name = model_class.__name__
-    base_classes = str(inspect.getmro(model_class))
+    framework = infer_framework(model_class)
-
+    if framework == "tf":
    if "keras.engine.training.Model" in base_classes:
        signature = inspect.signature(model_class.call)  # TensorFlow models
-    elif "torch.nn.modules.module.Module" in base_classes:
+    elif framework == "pt":
        signature = inspect.signature(model_class.forward)  # PyTorch models
    else:
        signature = inspect.signature(model_class.__call__)  # Flax models
@@ -565,3 +563,21 @@ def add_model_info_to_auto_map(auto_map, repo_id):
            auto_map[key] = f"{repo_id}--{value}"
    return auto_map
 def infer_framework(model_class):
    """
    Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
    classes are imported or available.
    """
    for base_class in inspect.getmro(model_class):
        module = base_class.__module__
        name = base_class.__name__
        if module.startswith("tensorflow") or module.startswith("keras") or name == "TFPreTrainedModel":
            return "tf"
        elif module.startswith("torch") or name == "PreTrainedModel":
            return "pt"
        elif module.startswith("flax") or module.startswith("jax") or name == "FlaxPreTrainedModel":
            return "flax"
    else:
        raise TypeError(f"Could not infer framework from class {model_class}.")