Fix Expected output for compressed-tensors tests (#36425)

fix
This commit is contained in:
Mohamed Mekkouri
2025-02-26 21:17:24 +01:00
committed by GitHub
parent 1603018e7a
commit a7fbab33ae

View File

@@ -47,7 +47,7 @@ class CompressedTensorsTest(unittest.TestCase):
self.assertIsInstance(config_from_dict.sparsity_config, SparsityCompressionConfig)
def test_tinyllama_w8a8(self):
expected_out = "<s> Paris is the capital of which country?\n\n**A) Paris**\n\n**Q** ** Paris is the capital of which country?\n\n**A) Paris**\n\n**Q** ** Paris is the capital of which country"
expected_out = "<s> Paris is the capital of which country?\n\n 1. Paris is the capital of which country?\n\n 1. Paris is the capital of which country?\n\n 1. Paris is the capital of which country?\n\n"
self._test_quantized_model(self.tinyllama_w8a8, expected_out)
def test_tinyllama_w4a16(self):
@@ -59,7 +59,7 @@ class CompressedTensorsTest(unittest.TestCase):
self._test_quantized_model(self.tinyllama_w8a16, expected_out)
def test_llama_8b_fp8(self):
expected_out = "<|begin_of_text|>Paris is the capital of which country? France\nWhat is the name of the famous art museum in Paris? The Louvre\nWhat is the name of the famous opera house in Paris? Palais Garnier\nWhat is the name of the"
expected_out = "<|begin_of_text|>Paris is the capital of which country? France\nWhat is the name of the famous museum in Paris that is home to the Mona Lisa? The Louvre\nWhat is the name of the famous bridge in Paris that is often associated with the city"
self._test_quantized_model(self.llama3_8b_fp8, expected_out)
def _test_quantized_model(self, model_name: str, expected_output: str):