Fix : BLOOM tie_word_embeddings in GGUF (#35812)
* fix bloom ggml * fix falcon output * make style
This commit is contained in:
@@ -400,7 +400,7 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
|
|||||||
|
|
||||||
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
|
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
|
||||||
# tie_word_embeddings is true otherwise false
|
# tie_word_embeddings is true otherwise false
|
||||||
exceptions = ["falcon"]
|
exceptions = ["falcon", "bloom"]
|
||||||
parsed_parameters["config"]["tie_word_embeddings"] = (
|
parsed_parameters["config"]["tie_word_embeddings"] = (
|
||||||
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
|
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -633,7 +633,7 @@ class GgufIntegrationTests(unittest.TestCase):
|
|||||||
text = tokenizer(self.example_text, return_tensors="pt")["input_ids"].to(torch_device)
|
text = tokenizer(self.example_text, return_tensors="pt")["input_ids"].to(torch_device)
|
||||||
out = model.generate(text, max_new_tokens=16)
|
out = model.generate(text, max_new_tokens=16)
|
||||||
|
|
||||||
EXPECTED_TEXT = 'Hello,\nI am trying to use the "get_post_meta"'
|
EXPECTED_TEXT = "Hello All,\nI am new to this forum.\nI am using the "
|
||||||
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
|
||||||
|
|
||||||
@unittest.skip("The test causes a torch.OutOfMemoryError on the CI but it passes with enough memory")
|
@unittest.skip("The test causes a torch.OutOfMemoryError on the CI but it passes with enough memory")
|
||||||
|
|||||||
Reference in New Issue
Block a user