Patch: v4.30.2

Fix push to hub (#24187 )
Add fix
2023-06-13 14:24:02 -04:00 · 2023-06-13 14:23:39 -04:00 · 2023-06-13 14:22:33 -04:00 · 2023-06-09 10:48:44 -04:00 · 2023-06-09 08:50:23 -04:00 · 2023-06-09 08:31:09 -04:00
9 changed files with 51 additions and 7 deletions
--- a/setup.py
+++ b/setup.py
@@ -428,7 +428,7 @@ install_requires = [

 setup(
    name="transformers",
-    version="4.30.0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    version="4.30.2",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
    author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
    author_email="transformers@huggingface.co",
    description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -18,7 +18,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).

-__version__ = "4.30.0"
+__version__ = "4.30.2"

 from typing import TYPE_CHECKING

--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -784,6 +784,13 @@ class PretrainedConfig(PushToHubMixin):
            ):
                serializable_config_dict[key] = value

+        if hasattr(self, "quantization_config"):
+            serializable_config_dict["quantization_config"] = (
+                self.quantization_config.to_dict()
+                if not isinstance(self.quantization_config, dict)
+                else self.quantization_config
+            )
+
        self.dict_torch_dtype_to_str(serializable_config_dict)

        return serializable_config_dict
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -81,7 +81,7 @@ elif parse(tf.__version__).minor >= 11:
    from keras.engine.keras_tensor import KerasTensor
 else:
    from tensorflow.python.keras import backend as K
-    from tensorflow.python.keras.engine import call_context
+    from tensorflow.python.keras.engine.base_layer_utils import call_context
    from tensorflow.python.keras.engine.keras_tensor import KerasTensor


@@ -1156,8 +1156,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
        if self.built or call_context().in_call:
            self.built = True
        else:
-            self(self.dummy_inputs, training=False)
            self.built = True
+            self(self.dummy_inputs, training=False)

    def __init__(self, config, *inputs, **kwargs):
        super().__init__(*inputs, **kwargs)
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1749,7 +1749,16 @@ class Trainer:

        # prepare using `accelerator` prepare
        if use_accelerator_prepare:
-            model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
+            if hasattr(self.lr_scheduler, "step"):
+                if self.use_apex:
+                    model = self.accelerator.prepare(self.model)
+                else:
+                    model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
+            else:
+                # to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
+                model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
+                    self.model, self.optimizer, self.lr_scheduler
+                )

        if self.is_fsdp_enabled:
            self.model = model
@@ -2841,6 +2850,7 @@ class Trainer:
            or self.is_fsdp_enabled
        ):
            if self.is_fsdp_enabled:
+                os.makedirs(output_dir, exist_ok=True)
                self.accelerator.state.fsdp_plugin.save_model(self.accelerator, self.model, output_dir)
            else:
                state_dict = self.model.state_dict()
--- a/src/transformers/utils/hub.py
+++ b/src/transformers/utils/hub.py
@@ -712,7 +712,6 @@ class PushToHubMixin:
        operations = []
        # upload standalone files
        for file in modified_files:
-            operations.append(CommitOperationAdd(path_or_fileobj=os.path.join(working_dir, file), path_in_repo=file))
            if os.path.isdir(os.path.join(working_dir, file)):
                # go over individual files of folder
                for f in os.listdir(os.path.join(working_dir, file)):
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -146,7 +146,9 @@ if FORCE_TF_AVAILABLE in ENV_VARS_TRUE_VALUES:
    _tf_available = True
 else:
    if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
-        _tf_available = _is_package_available("tensorflow")
+        # Note: _is_package_available("tensorflow") fails for tensorflow-cpu. Please test any changes to the line below
+        # with tensorflow-cpu to make sure it still works!
+        _tf_available = importlib.util.find_spec("tensorflow") is not None
        if _tf_available:
            candidates = (
                "tensorflow",
--- a/tests/bitsandbytes/test_4bit.py
+++ b/tests/bitsandbytes/test_4bit.py
@@ -111,6 +111,19 @@ class Bnb4BitTest(Base4bitTest):
        gc.collect()
        torch.cuda.empty_cache()

+    def test_quantization_config_json_serialization(self):
+        r"""
+        A simple test to check if the quantization config is correctly serialized and deserialized
+        """
+        config = self.model_4bit.config
+
+        self.assertTrue(hasattr(config, "quantization_config"))
+
+        _ = config.to_dict()
+        _ = config.to_diff_dict()
+
+        _ = config.to_json_string()
+
    def test_memory_footprint(self):
        r"""
        A simple test to check if the model conversion has been done correctly by checking on the
--- a/tests/bitsandbytes/test_mixed_int8.py
+++ b/tests/bitsandbytes/test_mixed_int8.py
@@ -118,6 +118,19 @@ class MixedInt8Test(BaseMixedInt8Test):
        gc.collect()
        torch.cuda.empty_cache()

+    def test_quantization_config_json_serialization(self):
+        r"""
+        A simple test to check if the quantization config is correctly serialized and deserialized
+        """
+        config = self.model_8bit.config
+
+        self.assertTrue(hasattr(config, "quantization_config"))
+
+        _ = config.to_dict()
+        _ = config.to_diff_dict()
+
+        _ = config.to_json_string()
+
    def test_memory_footprint(self):
        r"""
        A simple test to check if the model conversion has been done correctly by checking on the
Author	SHA1	Message	Date
Sylvain Gugger	66fd3a8d62	Patch: v4.30.2 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details	2023-06-13 14:24:02 -04:00
NielsRogge	8f9f1efaf8	Fix push to hub (#24187 ) Add fix	2023-06-13 14:23:39 -04:00
Matt	497d66740b	Fix how we detect the TF package (#24255 ) * Fix how we detect the TF package * Add a comment as a talisman warding against future harm * Actually put the comment in the right place	2023-06-13 14:22:33 -04:00
Sylvain Gugger	65a1ec05ca	Patch: v4.30.1 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details	2023-06-09 10:48:44 -04:00
Younes Belkada	fd59fc1a7f	[`bnb`] Fix bnb config json serialization (#24137 ) * fix bnb config json serialization * forward contrib credits from discussions --------- Co-authored-by: Andrechang <Andrechang@users.noreply.github.com>	2023-06-09 08:50:23 -04:00
Sourab Mangrulkar	a272e4135c	fix bugs with trainer (#24134 ) * fix the deepspeed test failures * apex fix * FSDP save ckpt fix * Update src/transformers/trainer.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --------- Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>	2023-06-09 08:31:09 -04:00
Matt	50ed79312d	Correctly build models and import call_context for older TF versions (#24138 )	2023-06-09 08:30:54 -04:00