Add GGUF support to T5-Encoder (#36700)
* add gguf support to t5encoder Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> * remove gguf from model_kwargs Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -369,6 +369,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
|
|||||||
architecture = read_field(reader, "general.architecture")[0]
|
architecture = read_field(reader, "general.architecture")[0]
|
||||||
model_name = read_field(reader, "general.name")
|
model_name = read_field(reader, "general.name")
|
||||||
|
|
||||||
|
updated_architecture = None
|
||||||
# in llama.cpp mistral models use the same architecture as llama. We need
|
# in llama.cpp mistral models use the same architecture as llama. We need
|
||||||
# to add this patch to ensure things work correctly on our side.
|
# to add this patch to ensure things work correctly on our side.
|
||||||
if "llama" in architecture and "mistral" in model_name:
|
if "llama" in architecture and "mistral" in model_name:
|
||||||
@@ -377,6 +378,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
|
|||||||
# It needs to be developed for supporting legacy t5.
|
# It needs to be developed for supporting legacy t5.
|
||||||
elif "t5" in architecture or "t5encoder" in architecture:
|
elif "t5" in architecture or "t5encoder" in architecture:
|
||||||
parsed_parameters["config"]["is_gated_act"] = True
|
parsed_parameters["config"]["is_gated_act"] = True
|
||||||
|
if "t5encoder" in architecture:
|
||||||
|
parsed_parameters["config"]["architectures"] = ["T5EncoderModel"]
|
||||||
updated_architecture = "t5"
|
updated_architecture = "t5"
|
||||||
else:
|
else:
|
||||||
updated_architecture = architecture
|
updated_architecture = architecture
|
||||||
@@ -395,7 +398,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
|
|||||||
parsed_parameters["config"]["use_qkv_bias"] = qkv_bias
|
parsed_parameters["config"]["use_qkv_bias"] = qkv_bias
|
||||||
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual
|
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual
|
||||||
|
|
||||||
if architecture not in GGUF_SUPPORTED_ARCHITECTURES:
|
if architecture not in GGUF_SUPPORTED_ARCHITECTURES and updated_architecture not in GGUF_SUPPORTED_ARCHITECTURES:
|
||||||
raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.")
|
raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.")
|
||||||
|
|
||||||
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
|
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
|
||||||
|
|||||||
@@ -4235,10 +4235,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
|||||||
token=token,
|
token=token,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
subfolder=subfolder,
|
subfolder=subfolder,
|
||||||
|
gguf_file=gguf_file,
|
||||||
_from_auto=from_auto_class,
|
_from_auto=from_auto_class,
|
||||||
_from_pipeline=from_pipeline,
|
_from_pipeline=from_pipeline,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
if "gguf_file" in model_kwargs:
|
||||||
|
model_kwargs.pop("gguf_file")
|
||||||
else:
|
else:
|
||||||
# In case one passes a config to `from_pretrained` + "attn_implementation"
|
# In case one passes a config to `from_pretrained` + "attn_implementation"
|
||||||
# override the `_attn_implementation` attribute to `attn_implementation` of the kwargs
|
# override the `_attn_implementation` attribute to `attn_implementation` of the kwargs
|
||||||
|
|||||||
Reference in New Issue
Block a user