Add GGUF support to T5-Encoder (#36700)

* add gguf support to t5encoder

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove gguf from model_kwargs

Signed-off-by: Isotr0py <2037008807@qq.com>

---------

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-03-14 00:57:33 +08:00
committed by GitHub
parent 4a60bae8e2
commit b070025aa6
2 changed files with 7 additions and 1 deletions

View File

@@ -369,6 +369,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
architecture = read_field(reader, "general.architecture")[0] architecture = read_field(reader, "general.architecture")[0]
model_name = read_field(reader, "general.name") model_name = read_field(reader, "general.name")
updated_architecture = None
# in llama.cpp mistral models use the same architecture as llama. We need # in llama.cpp mistral models use the same architecture as llama. We need
# to add this patch to ensure things work correctly on our side. # to add this patch to ensure things work correctly on our side.
if "llama" in architecture and "mistral" in model_name: if "llama" in architecture and "mistral" in model_name:
@@ -377,6 +378,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
# It needs to be developed for supporting legacy t5. # It needs to be developed for supporting legacy t5.
elif "t5" in architecture or "t5encoder" in architecture: elif "t5" in architecture or "t5encoder" in architecture:
parsed_parameters["config"]["is_gated_act"] = True parsed_parameters["config"]["is_gated_act"] = True
if "t5encoder" in architecture:
parsed_parameters["config"]["architectures"] = ["T5EncoderModel"]
updated_architecture = "t5" updated_architecture = "t5"
else: else:
updated_architecture = architecture updated_architecture = architecture
@@ -395,7 +398,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
parsed_parameters["config"]["use_qkv_bias"] = qkv_bias parsed_parameters["config"]["use_qkv_bias"] = qkv_bias
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual
if architecture not in GGUF_SUPPORTED_ARCHITECTURES: if architecture not in GGUF_SUPPORTED_ARCHITECTURES and updated_architecture not in GGUF_SUPPORTED_ARCHITECTURES:
raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.") raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.")
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors, # Handle tie_word_embeddings, if lm_head.weight is not present in tensors,

View File

@@ -4235,10 +4235,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
token=token, token=token,
revision=revision, revision=revision,
subfolder=subfolder, subfolder=subfolder,
gguf_file=gguf_file,
_from_auto=from_auto_class, _from_auto=from_auto_class,
_from_pipeline=from_pipeline, _from_pipeline=from_pipeline,
**kwargs, **kwargs,
) )
if "gguf_file" in model_kwargs:
model_kwargs.pop("gguf_file")
else: else:
# In case one passes a config to `from_pretrained` + "attn_implementation" # In case one passes a config to `from_pretrained` + "attn_implementation"
# override the `_attn_implementation` attribute to `attn_implementation` of the kwargs # override the `_attn_implementation` attribute to `attn_implementation` of the kwargs