From db2f5354439f887f4ae0a46fb3f4a6dd4bec3b45 Mon Sep 17 00:00:00 2001
From: jiqing-feng <jiqing.feng@intel.com>
Date: Wed, 2 Jul 2025 02:06:37 +0800
Subject: [PATCH] update bnb ground truth (#39117)

* update bnb resulte

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* set seed to avoid sampling different results

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix int8 tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix typo

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* add comments

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
---
 tests/quantization/bnb/test_4bit.py       | 5 +++++
 tests/quantization/bnb/test_mixed_int8.py | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
index 9dc0bc396d..fd72d13505 100644
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@@ -27,6 +27,7 @@ from transformers import (
     AutoTokenizer,
     BitsAndBytesConfig,
     pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@@ -111,6 +112,8 @@ class Base4bitTest(unittest.TestCase):
     EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University")
     EXPECTED_OUTPUTS.add("Hello my name is John and I am 25 years old.")
     EXPECTED_OUTPUTS.add("Hello my name is John and I am a student at the University of")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
     MAX_NEW_TOKENS = 10
 
     def setUp(self):
@@ -513,6 +516,8 @@ class Pipeline4BitTest(Base4bitTest):
             max_new_tokens=self.MAX_NEW_TOKENS,
         )
 
+        # Avoid sampling different outputs
+        set_seed(42)
         # Real second forward pass
         pipeline_output = self.pipe(self.input_text)
         self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
index 01755d8fee..304d97879f 100644
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -27,6 +27,7 @@ from transformers import (
     AutoTokenizer,
     BitsAndBytesConfig,
     pipeline,
+    set_seed,
 )
 from transformers.models.opt.modeling_opt import OPTAttention
 from transformers.testing_utils import (
@@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase):
     MAX_NEW_TOKENS = 10
     # Expected values with offload
     EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in")
+    # Expected values on Intel XPU and NV A100
+    EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
 
     def setUp(self):
         # Models and tokenizer
@@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
             max_new_tokens=self.MAX_NEW_TOKENS,
         )
 
+        # Avoid sampling different outputs
+        set_seed(42)
         # Real second forward pass
         pipeline_output = self.pipe(self.input_text)
         self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)