fix xpu tests (#36656)

* fix awq xpu tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* update

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

* fix llava next video bnb tests

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>

---------

Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
This commit is contained in:
jiqing-feng
2025-03-17 22:57:49 +08:00
committed by GitHub
parent da7d64f4ff
commit 27361bd218
3 changed files with 19 additions and 11 deletions

View File

@@ -902,6 +902,9 @@ class AwqConfig(QuantizationConfigMixin):
)
if self.backend == AwqBackendPackingMethod.LLMAWQ:
# Only cuda device can run this function
if not torch.cuda.is_available():
raise ValueError("LLM-AWQ backend is only supported on CUDA")
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
if major < 8:

View File

@@ -430,13 +430,13 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
# verify generation
output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
EXPECTED_DECODED_TEXT = 'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems' # fmt: skip
self.assertEqual(
self.processor.decode(output[0], skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
EXPECTED_DECODED_TEXT = (
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems", # cuda output
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are", # xpu output
)
self.assertTrue(self.processor.decode(output[0], skip_special_tokens=True) in EXPECTED_DECODED_TEXT)
@slow
@require_bitsandbytes
def test_small_model_integration_test_batch(self):

View File

@@ -59,16 +59,21 @@ class AwqConfigTest(unittest.TestCase):
with self.assertRaises(ValueError):
AwqConfig(bits=4, backend="unexisting-backend")
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
# Only cuda device can run this function
support_llm_awq = False
if torch.cuda.is_available():
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
if major >= 8:
support_llm_awq = True
if major < 8:
if support_llm_awq:
# LLMAWQ should work on an A100
AwqConfig(bits=4, backend="llm-awq")
else:
# LLMAWQ does not work on a T4
with self.assertRaises(ValueError):
AwqConfig(bits=4, backend="llm-awq")
else:
# LLMAWQ should work on an A100
AwqConfig(bits=4, backend="llm-awq")
def test_to_dict(self):
"""