From b070025aa64763287d57351775705398c7c8c8ec Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Fri, 14 Mar 2025 00:57:33 +0800 Subject: [PATCH] Add GGUF support to T5-Encoder (#36700) * add gguf support to t5encoder Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> * remove gguf from model_kwargs Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com> --- src/transformers/modeling_gguf_pytorch_utils.py | 5 ++++- src/transformers/modeling_utils.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index 0f21f80e04..1c971d0497 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -369,6 +369,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo architecture = read_field(reader, "general.architecture")[0] model_name = read_field(reader, "general.name") + updated_architecture = None # in llama.cpp mistral models use the same architecture as llama. We need # to add this patch to ensure things work correctly on our side. if "llama" in architecture and "mistral" in model_name: @@ -377,6 +378,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo # It needs to be developed for supporting legacy t5. elif "t5" in architecture or "t5encoder" in architecture: parsed_parameters["config"]["is_gated_act"] = True + if "t5encoder" in architecture: + parsed_parameters["config"]["architectures"] = ["T5EncoderModel"] updated_architecture = "t5" else: updated_architecture = architecture @@ -395,7 +398,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo parsed_parameters["config"]["use_qkv_bias"] = qkv_bias parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual - if architecture not in GGUF_SUPPORTED_ARCHITECTURES: + if architecture not in GGUF_SUPPORTED_ARCHITECTURES and updated_architecture not in GGUF_SUPPORTED_ARCHITECTURES: raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.") # Handle tie_word_embeddings, if lm_head.weight is not present in tensors, diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 723dc53b34..727661137c 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -4235,10 +4235,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix token=token, revision=revision, subfolder=subfolder, + gguf_file=gguf_file, _from_auto=from_auto_class, _from_pipeline=from_pipeline, **kwargs, ) + if "gguf_file" in model_kwargs: + model_kwargs.pop("gguf_file") else: # In case one passes a config to `from_pretrained` + "attn_implementation" # override the `_attn_implementation` attribute to `attn_implementation` of the kwargs