Compare commits

..

7 Commits

Author SHA1 Message Date
Sylvain Gugger
66fd3a8d62 Patch: v4.30.2
Some checks failed
Release - Conda / build_and_package (push) Has been cancelled
2023-06-13 14:24:02 -04:00
NielsRogge
8f9f1efaf8 Fix push to hub (#24187)
Add fix
2023-06-13 14:23:39 -04:00
Matt
497d66740b Fix how we detect the TF package (#24255)
* Fix how we detect the TF package

* Add a comment as a talisman warding against future harm

* Actually put the comment in the right place
2023-06-13 14:22:33 -04:00
Sylvain Gugger
65a1ec05ca Patch: v4.30.1
Some checks failed
Release - Conda / build_and_package (push) Has been cancelled
2023-06-09 10:48:44 -04:00
Younes Belkada
fd59fc1a7f [bnb] Fix bnb config json serialization (#24137)
* fix bnb config json serialization

* forward contrib credits from discussions

---------

Co-authored-by: Andrechang <Andrechang@users.noreply.github.com>
2023-06-09 08:50:23 -04:00
Sourab Mangrulkar
a272e4135c fix bugs with trainer (#24134)
* fix the deepspeed test failures

* apex fix

* FSDP save ckpt fix

* Update src/transformers/trainer.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

---------

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2023-06-09 08:31:09 -04:00
Matt
50ed79312d Correctly build models and import call_context for older TF versions (#24138) 2023-06-09 08:30:54 -04:00
9 changed files with 51 additions and 7 deletions

View File

@@ -428,7 +428,7 @@ install_requires = [
setup(
name="transformers",
version="4.30.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.30.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",

View File

@@ -18,7 +18,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.30.0"
__version__ = "4.30.2"
from typing import TYPE_CHECKING

View File

@@ -784,6 +784,13 @@ class PretrainedConfig(PushToHubMixin):
):
serializable_config_dict[key] = value
if hasattr(self, "quantization_config"):
serializable_config_dict["quantization_config"] = (
self.quantization_config.to_dict()
if not isinstance(self.quantization_config, dict)
else self.quantization_config
)
self.dict_torch_dtype_to_str(serializable_config_dict)
return serializable_config_dict

View File

@@ -81,7 +81,7 @@ elif parse(tf.__version__).minor >= 11:
from keras.engine.keras_tensor import KerasTensor
else:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine import call_context
from tensorflow.python.keras.engine.base_layer_utils import call_context
from tensorflow.python.keras.engine.keras_tensor import KerasTensor
@@ -1156,8 +1156,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
if self.built or call_context().in_call:
self.built = True
else:
self(self.dummy_inputs, training=False)
self.built = True
self(self.dummy_inputs, training=False)
def __init__(self, config, *inputs, **kwargs):
super().__init__(*inputs, **kwargs)

View File

@@ -1749,7 +1749,16 @@ class Trainer:
# prepare using `accelerator` prepare
if use_accelerator_prepare:
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
if hasattr(self.lr_scheduler, "step"):
if self.use_apex:
model = self.accelerator.prepare(self.model)
else:
model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
else:
# to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
self.model, self.optimizer, self.lr_scheduler
)
if self.is_fsdp_enabled:
self.model = model
@@ -2841,6 +2850,7 @@ class Trainer:
or self.is_fsdp_enabled
):
if self.is_fsdp_enabled:
os.makedirs(output_dir, exist_ok=True)
self.accelerator.state.fsdp_plugin.save_model(self.accelerator, self.model, output_dir)
else:
state_dict = self.model.state_dict()

View File

@@ -712,7 +712,6 @@ class PushToHubMixin:
operations = []
# upload standalone files
for file in modified_files:
operations.append(CommitOperationAdd(path_or_fileobj=os.path.join(working_dir, file), path_in_repo=file))
if os.path.isdir(os.path.join(working_dir, file)):
# go over individual files of folder
for f in os.listdir(os.path.join(working_dir, file)):

View File

@@ -146,7 +146,9 @@ if FORCE_TF_AVAILABLE in ENV_VARS_TRUE_VALUES:
_tf_available = True
else:
if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
_tf_available = _is_package_available("tensorflow")
# Note: _is_package_available("tensorflow") fails for tensorflow-cpu. Please test any changes to the line below
# with tensorflow-cpu to make sure it still works!
_tf_available = importlib.util.find_spec("tensorflow") is not None
if _tf_available:
candidates = (
"tensorflow",

View File

@@ -111,6 +111,19 @@ class Bnb4BitTest(Base4bitTest):
gc.collect()
torch.cuda.empty_cache()
def test_quantization_config_json_serialization(self):
r"""
A simple test to check if the quantization config is correctly serialized and deserialized
"""
config = self.model_4bit.config
self.assertTrue(hasattr(config, "quantization_config"))
_ = config.to_dict()
_ = config.to_diff_dict()
_ = config.to_json_string()
def test_memory_footprint(self):
r"""
A simple test to check if the model conversion has been done correctly by checking on the

View File

@@ -118,6 +118,19 @@ class MixedInt8Test(BaseMixedInt8Test):
gc.collect()
torch.cuda.empty_cache()
def test_quantization_config_json_serialization(self):
r"""
A simple test to check if the quantization config is correctly serialized and deserialized
"""
config = self.model_8bit.config
self.assertTrue(hasattr(config, "quantization_config"))
_ = config.to_dict()
_ = config.to_diff_dict()
_ = config.to_json_string()
def test_memory_footprint(self):
r"""
A simple test to check if the model conversion has been done correctly by checking on the