diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 9dc0bc396d..fd72d13505 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -27,6 +27,7 @@ from transformers import ( AutoTokenizer, BitsAndBytesConfig, pipeline, + set_seed, ) from transformers.models.opt.modeling_opt import OPTAttention from transformers.testing_utils import ( @@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase): EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University") EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.") EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of") + # Expected values on Intel XPU and NV A100 + EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional") MAX_NEW_TOKENS = 10 def setUp(self): @@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest): max_new_tokens=self.MAX_NEW_TOKENS, ) + # Avoid sampling different outputs + set_seed(42) # Real second forward pass pipeline_output = self.pipe(self.input_text) self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS) diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py index 01755d8fee..304d97879f 100644 --- a/tests/quantization/bnb/test_mixed_int8.py +++ b/tests/quantization/bnb/test_mixed_int8.py @@ -27,6 +27,7 @@ from transformers import ( AutoTokenizer, BitsAndBytesConfig, pipeline, + set_seed, ) from transformers.models.opt.modeling_opt import OPTAttention from transformers.testing_utils import ( @@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase): MAX_NEW_TOKENS = 10 # Expected values with offload EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in") + # Expected values on Intel XPU and NV A100 + EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional") def setUp(self): # Models and tokenizer @@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test): max_new_tokens=self.MAX_NEW_TOKENS, ) + # Avoid sampling different outputs + set_seed(42) # Real second forward pass pipeline_output = self.pipe(self.input_text) self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)