update bnb ground truth (#39117)

* update bnb resulte

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* set seed to avoid sampling different results

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix int8 tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix typo

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* add comments

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
This commit is contained in:
jiqing-feng
2025-07-02 02:06:37 +08:00
committed by GitHub
parent 260846efad
commit db2f535443
2 changed files with 10 additions and 0 deletions

View File

@@ -27,6 +27,7 @@ from transformers import (
AutoTokenizer,
BitsAndBytesConfig,
pipeline,
set_seed,
)
from transformers.models.opt.modeling_opt import OPTAttention
from transformers.testing_utils import (
@@ -113,6 +114,8 @@ class BaseMixedInt8Test(unittest.TestCase):
MAX_NEW_TOKENS = 10
# Expected values with offload
EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer based in")
# Expected values on Intel XPU and NV A100
EXPECTED_OUTPUTS.add("Hello my name is Alina. I have been working as a professional")
def setUp(self):
# Models and tokenizer
@@ -649,6 +652,8 @@ class MixedInt8TestPipeline(BaseMixedInt8Test):
max_new_tokens=self.MAX_NEW_TOKENS,
)
# Avoid sampling different outputs
set_seed(42)
# Real second forward pass
pipeline_output = self.pipe(self.input_text)
self.assertIn(pipeline_output[0]["generated_text"], self.EXPECTED_OUTPUTS)