From db2f5354439f887f4ae0a46fb3f4a6dd4bec3b45 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Wed, 2 Jul 2025 02:06:37 +0800 Subject: [PATCH] update bnb ground truth (#39117) * update bnb resulte Signed-off-by: jiqing-feng * set seed to avoid sampling different results Signed-off-by: jiqing-feng * fix int8 tests Signed-off-by: jiqing-feng * fix typo Signed-off-by: jiqing-feng * add comments Signed-off-by: jiqing-feng --------- Signed-off-by: jiqing-feng --- tests/quantization/bnb/test_4bit.py | 5 +++++ tests/quantization/bnb/test_mixed_int8.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 9dc0bc396d..fd72d13505 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -27,6 +27,7 @@ from transformers import ( AutoTokenizer, BitsAndBytesConfig, pipeline, + set_seed, ) from transformers.models.opt.modeling_opt import OPTAttention from transformers.testing_utils import ( @@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase): EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University") EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.") EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of") + # Expected values on Intel XPU and NV A100 + EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional") MAX_NEW_TOKENS = 10 def setUp(self): @@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest): max_new_tokens=self.MAX_NEW_TOKENS, ) + # Avoid sampling different outputs + set_seed(42) # Real second forward pass pipeline_output = self.pipe(self.input_text) self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS) diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py index 01755d8fee..304d97879f 100644 --- a/tests/quantization/bnb/test_mixed_int8.py +++ b/tests/quantization/bnb/test_mixed_int8.py @@ -27,6 +27,7 @@ from transformers import ( AutoTokenizer, BitsAndBytesConfig, pipeline, + set_seed, ) from transformers.models.opt.modeling_opt import OPTAttention from transformers.testing_utils import ( @@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase): MAX_NEW_TOKENS = 10 # Expected values with offload EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in") + # Expected values on Intel XPU and NV A100 + EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional") def setUp(self): # Models and tokenizer @@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test): max_new_tokens=self.MAX_NEW_TOKENS, ) + # Avoid sampling different outputs + set_seed(42) # Real second forward pass pipeline_output = self.pipe(self.input_text) self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)