From dbd84741258da3775f5635e2b4567d3d6321e2fe Mon Sep 17 00:00:00 2001 From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> Date: Tue, 21 Jan 2025 15:35:54 +0100 Subject: [PATCH] Fix : BLOOM tie_word_embeddings in GGUF (#35812) * fix bloom ggml * fix falcon output * make style --- src/transformers/modeling_gguf_pytorch_utils.py | 2 +- tests/quantization/ggml/test_ggml.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index 21385233a7..fa2e21efdb 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -400,7 +400,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo # Handle tie_word_embeddings, if lm_head.weight is not present in tensors, # tie_word_embeddings is true otherwise false - exceptions = ["falcon"] + exceptions = ["falcon", "bloom"] parsed_parameters["config"]["tie_word_embeddings"] = ( all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions ) diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py index ad5cdb17fe..08e87ba44a 100644 --- a/tests/quantization/ggml/test_ggml.py +++ b/tests/quantization/ggml/test_ggml.py @@ -633,7 +633,7 @@ class GgufIntegrationTests(unittest.TestCase): text = tokenizer(self.example_text, return_tensors="pt")["input_ids"].to(torch_device) out = model.generate(text, max_new_tokens=16) - EXPECTED_TEXT = 'Hello,\nI am trying to use the "get_post_meta"' + EXPECTED_TEXT = "Hello All,\nI am new to this forum.\nI am using the " self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT) @unittest.skip("The test causes a torch.OutOfMemoryError on the CI but it passes with enough memory")