enable more test cases on xpu (#38572)

* enable glm4 integration cases on XPU, set xpu expectation for blip2

Signed-off-by: Matrix YAO <matrix.yao@intel.com>

* more

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix style

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* refine wording

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* refine test case names

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* run

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* add gemma2 and chameleon

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* fix review comments

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: Matrix YAO <matrix.yao@intel.com>
Signed-off-by: YAO Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-06-06 15:29:51 +08:00
committed by GitHub
parent 31023b6909
commit 89542fb81c
23 changed files with 150 additions and 72 deletions

View File

@@ -24,6 +24,7 @@ from transformers.testing_utils import (
cleanup,
require_flash_attn,
require_torch,
require_torch_large_accelerator,
require_torch_large_gpu,
require_torch_sdpa,
slow,
@@ -79,7 +80,7 @@ class Glm4ModelTest(CausalLMModelTest, unittest.TestCase):
@slow
@require_torch_large_gpu
@require_torch_large_accelerator
class Glm4IntegrationTest(unittest.TestCase):
input_text = ["Hello I am doing", "Hi today"]
model_id = "THUDM/GLM-4-9B-0414"
@@ -90,6 +91,10 @@ class Glm4IntegrationTest(unittest.TestCase):
def test_model_9b_fp16(self):
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
"Hi today I am going to tell you about the most common disease in the world. This disease is called diabetes",
],
("cuda", 7): [],
("cuda", 8): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
@@ -114,6 +119,10 @@ class Glm4IntegrationTest(unittest.TestCase):
def test_model_9b_bf16(self):
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
"Hi today I am going to tell you about the most common disease in the world. This disease is called diabetes",
],
("cuda", 7): [],
("cuda", 8): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
@@ -138,6 +147,10 @@ class Glm4IntegrationTest(unittest.TestCase):
def test_model_9b_eager(self):
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and who",
"Hi today I am going to tell you about the most common disease in the world. This disease is called diabetes",
],
("cuda", 7): [],
("cuda", 8): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
@@ -167,6 +180,10 @@ class Glm4IntegrationTest(unittest.TestCase):
def test_model_9b_sdpa(self):
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
"Hi today I am going to tell you about the most common disease in the world. This disease is called diabetes",
],
("cuda", 7): [],
("cuda", 8): [
"Hello I am doing a project on the history of the internet and I need to know what the first website was and what",
@@ -193,6 +210,7 @@ class Glm4IntegrationTest(unittest.TestCase):
self.assertEqual(output_text, EXPECTED_TEXT)
@require_flash_attn
@require_torch_large_gpu
@pytest.mark.flash_attn_test
def test_model_9b_flash_attn(self):
EXPECTED_TEXTS = Expectations(