enable more test cases on xpu (#38572)

* enable glm4 integration cases on XPU, set xpu expectation for blip2 Signed-off-by: Matrix YAO <matrix.yao@intel.com> * more Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> * refine wording Signed-off-by: YAO Matrix <matrix.yao@intel.com> * refine test case names Signed-off-by: YAO Matrix <matrix.yao@intel.com> * run Signed-off-by: YAO Matrix <matrix.yao@intel.com> * add gemma2 and chameleon Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix review comments Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: Matrix YAO <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com>
2025-06-06 15:29:51 +08:00
parent 31023b6909
commit 89542fb81c
23 changed files with 150 additions and 72 deletions
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -730,7 +730,7 @@ class MixedInt8TestCpuGpu(BaseMixedInt8Test):
        output_text = self.tokenizer.decode(output_parallel[0], skip_special_tokens=True)
        self.assertIn(output_text, self.EXPECTED_OUTPUTS)

-    def test_cpu_gpu_loading_random_device_map(self):
+    def test_cpu_accelerator_loading_random_device_map(self):
        r"""
        A test to check is dispatching a model on cpu & gpu works correctly using a random `device_map`.
        """
@@ -778,7 +778,7 @@ class MixedInt8TestCpuGpu(BaseMixedInt8Test):

        self.check_inference_correctness(model_8bit)

-    def test_cpu_gpu_loading_custom_device_map(self):
+    def test_cpu_accelerator_loading_custom_device_map(self):
        r"""
        A test to check is dispatching a model on cpu & gpu works correctly using a custom `device_map`.
        This time the device map is more organized than the test above and uses the abstraction
@@ -805,7 +805,7 @@ class MixedInt8TestCpuGpu(BaseMixedInt8Test):

        self.check_inference_correctness(model_8bit)

-    def test_cpu_gpu_disk_loading_custom_device_map(self):
+    def test_cpu_accelerator_disk_loading_custom_device_map(self):
        r"""
        A test to check is dispatching a model on cpu & gpu works correctly using a custom `device_map`.
        This time we also add `disk` on the device_map.
@@ -832,7 +832,7 @@ class MixedInt8TestCpuGpu(BaseMixedInt8Test):

            self.check_inference_correctness(model_8bit)

-    def test_cpu_gpu_disk_loading_custom_device_map_kwargs(self):
+    def test_cpu_accelerator_disk_loading_custom_device_map_kwargs(self):
        r"""
        A test to check is dispatching a model on cpu & gpu works correctly using a custom `device_map`.
        This time we also add `disk` on the device_map - using the kwargs directly instead of the quantization config