Compare commits

...

4 Commits

Author SHA1 Message Date
Sylvain Gugger
65a1ec05ca Patch: v4.30.1
Some checks failed
Release - Conda / build_and_package (push) Has been cancelled
2023-06-09 10:48:44 -04:00
Younes Belkada
fd59fc1a7f [bnb] Fix bnb config json serialization (#24137)
* fix bnb config json serialization

* forward contrib credits from discussions

---------

Co-authored-by: Andrechang <Andrechang@users.noreply.github.com>
2023-06-09 08:50:23 -04:00
Sourab Mangrulkar
a272e4135c fix bugs with trainer (#24134)
* fix the deepspeed test failures

* apex fix

* FSDP save ckpt fix

* Update src/transformers/trainer.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

---------

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2023-06-09 08:31:09 -04:00
Matt
50ed79312d Correctly build models and import call_context for older TF versions (#24138) 2023-06-09 08:30:54 -04:00
7 changed files with 48 additions and 5 deletions

View File

@@ -428,7 +428,7 @@ install_requires = [
setup(
name="transformers",
version="4.30.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.30.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",

View File

@@ -18,7 +18,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.30.0"
__version__ = "4.30.1"
from typing import TYPE_CHECKING

View File

@@ -784,6 +784,13 @@ class PretrainedConfig(PushToHubMixin):
):
serializable_config_dict[key] = value
if hasattr(self, "quantization_config"):
serializable_config_dict["quantization_config"] = (
self.quantization_config.to_dict()
if not isinstance(self.quantization_config, dict)
else self.quantization_config
)
self.dict_torch_dtype_to_str(serializable_config_dict)
return serializable_config_dict

View File

@@ -81,7 +81,7 @@ elif parse(tf.__version__).minor >= 11:
from keras.engine.keras_tensor import KerasTensor
else:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine import call_context
from tensorflow.python.keras.engine.base_layer_utils import call_context
from tensorflow.python.keras.engine.keras_tensor import KerasTensor
@@ -1156,8 +1156,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
if self.built or call_context().in_call:
self.built = True
else:
self(self.dummy_inputs, training=False)
self.built = True
self(self.dummy_inputs, training=False)
def __init__(self, config, *inputs, **kwargs):
super().__init__(*inputs, **kwargs)

View File

@@ -1749,7 +1749,16 @@ class Trainer:
# prepare using `accelerator` prepare
if use_accelerator_prepare:
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
if hasattr(self.lr_scheduler, "step"):
if self.use_apex:
model = self.accelerator.prepare(self.model)
else:
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
else:
# to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
self.model, self.optimizer, self.lr_scheduler
)
if self.is_fsdp_enabled:
self.model = model
@@ -2841,6 +2850,7 @@ class Trainer:
or self.is_fsdp_enabled
):
if self.is_fsdp_enabled:
os.makedirs(output_dir, exist_ok=True)
self.accelerator.state.fsdp_plugin.save_model(self.accelerator, self.model, output_dir)
else:
state_dict = self.model.state_dict()

View File

@@ -111,6 +111,19 @@ class Bnb4BitTest(Base4bitTest):
gc.collect()
torch.cuda.empty_cache()
def test_quantization_config_json_serialization(self):
r"""
A simple test to check if the quantization config is correctly serialized and deserialized
"""
config = self.model_4bit.config
self.assertTrue(hasattr(config, "quantization_config"))
_ = config.to_dict()
_ = config.to_diff_dict()
_ = config.to_json_string()
def test_memory_footprint(self):
r"""
A simple test to check if the model conversion has been done correctly by checking on the

View File

@@ -118,6 +118,19 @@ class MixedInt8Test(BaseMixedInt8Test):
gc.collect()
torch.cuda.empty_cache()
def test_quantization_config_json_serialization(self):
r"""
A simple test to check if the quantization config is correctly serialized and deserialized
"""
config = self.model_8bit.config
self.assertTrue(hasattr(config, "quantization_config"))
_ = config.to_dict()
_ = config.to_diff_dict()
_ = config.to_json_string()
def test_memory_footprint(self):
r"""
A simple test to check if the model conversion has been done correctly by checking on the