From dbd84741258da3775f5635e2b4567d3d6321e2fe Mon Sep 17 00:00:00 2001
From: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
Date: Tue, 21 Jan 2025 15:35:54 +0100
Subject: [PATCH] Fix : BLOOM tie_word_embeddings in GGUF (#35812)

* fix bloom ggml

* fix falcon output

* make style
---
 src/transformers/modeling_gguf_pytorch_utils.py | 2 +-
 tests/quantization/ggml/test_ggml.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py
index 21385233a7..fa2e21efdb 100644
--- a/src/transformers/modeling_gguf_pytorch_utils.py
+++ b/src/transformers/modeling_gguf_pytorch_utils.py
@@ -400,7 +400,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
 
     # Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
     # tie_word_embeddings is true otherwise false
-    exceptions = ["falcon"]
+    exceptions = ["falcon", "bloom"]
     parsed_parameters["config"]["tie_word_embeddings"] = (
         all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
     )
diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
index ad5cdb17fe..08e87ba44a 100644
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -633,7 +633,7 @@ class GgufIntegrationTests(unittest.TestCase):
         text = tokenizer(self.example_text, return_tensors="pt")["input_ids"].to(torch_device)
         out = model.generate(text, max_new_tokens=16)
 
-        EXPECTED_TEXT = 'Hello,\nI am trying to use the "get_post_meta"'
+        EXPECTED_TEXT = "Hello All,\nI am new to this forum.\nI am using the "
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
 
     @unittest.skip("The test causes a torch.OutOfMemoryError on the CI but it passes with enough memory")