Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
868d36d29e | ||
|
|
5cea2e73ef |
2
setup.py
2
setup.py
@@ -430,7 +430,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.43.3", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.43.4", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.43.3"
|
||||
__version__ = "4.43.4"
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -1980,12 +1980,22 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
if new_num_tokens is None and pad_to_multiple_of is None:
|
||||
return model_embeds
|
||||
|
||||
# Since we are basically resuing the same old embeddings with new weight values, gathering is required
|
||||
is_quantized = hasattr(self, "hf_quantizer") and self.hf_quantizer is not None
|
||||
if is_deepspeed_zero3_enabled() and not is_quantized:
|
||||
import deepspeed
|
||||
|
||||
with deepspeed.zero.GatheredParameters(model_embeds.weight, modifier_rank=None):
|
||||
vocab_size = model_embeds.weight.shape[0]
|
||||
else:
|
||||
vocab_size = model_embeds.weight.shape[0]
|
||||
|
||||
# Update base model and current model config
|
||||
if hasattr(self.config, "text_config"):
|
||||
self.config.text_config.vocab_size = model_embeds.weight.shape[0]
|
||||
self.config.text_config.vocab_size = vocab_size
|
||||
else:
|
||||
self.config.vocab_size = model_embeds.weight.shape[0]
|
||||
self.vocab_size = model_embeds.weight.shape[0]
|
||||
self.config.vocab_size = vocab_size
|
||||
self.vocab_size = vocab_size
|
||||
|
||||
# Tie weights again if needed
|
||||
self.tie_weights()
|
||||
@@ -2139,7 +2149,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
|
||||
params = [old_embeddings.weight, new_embeddings.weight]
|
||||
with deepspeed.zero.GatheredParameters(params, modifier_rank=0):
|
||||
old_embeddings.weight.data = new_embeddings.weight.data
|
||||
old_embeddings.weight = new_embeddings.weight
|
||||
old_embeddings.num_embeddings = new_embeddings.weight.data.shape[0]
|
||||
|
||||
# If the new number of tokens is smaller than the original `padding_idx`, the `padding_idx`
|
||||
|
||||
Reference in New Issue
Block a user