Patch release v4.43.4

Resize embeds with DeepSpeed (#32214 )
* fix resize when deepspeed * deepsped uses new embeds * we needed this
2024-08-05 12:40:38 +02:00 · 2024-08-05 12:40:01 +02:00
3 changed files with 16 additions and 6 deletions
--- a/setup.py
+++ b/setup.py
@@ -430,7 +430,7 @@ install_requires = [

 setup(
    name="transformers",
-    version="4.43.3",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    version="4.43.4",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
    author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
    author_email="transformers@huggingface.co",
    description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -18,7 +18,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).

-__version__ = "4.43.3"
+__version__ = "4.43.4"

 from typing import TYPE_CHECKING

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1980,12 +1980,22 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
        if new_num_tokens is None and pad_to_multiple_of is None:
            return model_embeds

+        # Since we are basically resuing the same old embeddings with new weight values, gathering is required
+        is_quantized = hasattr(self, "hf_quantizer") and self.hf_quantizer is not None
+        if is_deepspeed_zero3_enabled() and not is_quantized:
+            import deepspeed
+
+            with deepspeed.zero.GatheredParameters(model_embeds.weight, modifier_rank=None):
+                vocab_size = model_embeds.weight.shape[0]
+        else:
+            vocab_size = model_embeds.weight.shape[0]
+
        # Update base model and current model config
        if hasattr(self.config, "text_config"):
-            self.config.text_config.vocab_size = model_embeds.weight.shape[0]
+            self.config.text_config.vocab_size = vocab_size
        else:
-            self.config.vocab_size = model_embeds.weight.shape[0]
-        self.vocab_size = model_embeds.weight.shape[0]
+            self.config.vocab_size = vocab_size
+        self.vocab_size = vocab_size

        # Tie weights again if needed
        self.tie_weights()
@@ -2139,7 +2149,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix

            params = [old_embeddings.weight, new_embeddings.weight]
            with deepspeed.zero.GatheredParameters(params, modifier_rank=0):
-                old_embeddings.weight.data = new_embeddings.weight.data
+                old_embeddings.weight = new_embeddings.weight
                old_embeddings.num_embeddings = new_embeddings.weight.data.shape[0]

                # If the new number of tokens is smaller than the original `padding_idx`, the `padding_idx`
Author	SHA1	Message	Date
Arthur Zucker	868d36d29e	Patch release v4.43.4 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2024-08-05 12:40:38 +02:00
Raushan Turganbay	5cea2e73ef	Resize embeds with DeepSpeed (#32214 ) * fix resize when deepspeed * deepsped uses new embeds * we needed this	2024-08-05 12:40:01 +02:00