fix xpu tests (#36656)
* fix awq xpu tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * update Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * fix llava next video bnb tests Signed-off-by: jiqing-feng <jiqing.feng@intel.com> --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com> Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
This commit is contained in:
@@ -902,6 +902,9 @@ class AwqConfig(QuantizationConfigMixin):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.backend == AwqBackendPackingMethod.LLMAWQ:
|
if self.backend == AwqBackendPackingMethod.LLMAWQ:
|
||||||
|
# Only cuda device can run this function
|
||||||
|
if not torch.cuda.is_available():
|
||||||
|
raise ValueError("LLM-AWQ backend is only supported on CUDA")
|
||||||
compute_capability = torch.cuda.get_device_capability()
|
compute_capability = torch.cuda.get_device_capability()
|
||||||
major, minor = compute_capability
|
major, minor = compute_capability
|
||||||
if major < 8:
|
if major < 8:
|
||||||
|
|||||||
@@ -430,13 +430,13 @@ class LlavaNextVideoForConditionalGenerationIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
# verify generation
|
# verify generation
|
||||||
output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
|
output = model.generate(**inputs, do_sample=False, max_new_tokens=40)
|
||||||
EXPECTED_DECODED_TEXT = 'USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems' # fmt: skip
|
EXPECTED_DECODED_TEXT = (
|
||||||
|
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while another child is attempting to read the same book. The child who is reading the book seems", # cuda output
|
||||||
self.assertEqual(
|
"USER: \nWhy is this video funny? ASSISTANT: The humor in this video comes from the unexpected and somewhat comical situation of a young child reading a book while wearing a pair of glasses that are too large for them. The glasses are", # xpu output
|
||||||
self.processor.decode(output[0], skip_special_tokens=True),
|
|
||||||
EXPECTED_DECODED_TEXT,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.assertTrue(self.processor.decode(output[0], skip_special_tokens=True) in EXPECTED_DECODED_TEXT)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
def test_small_model_integration_test_batch(self):
|
def test_small_model_integration_test_batch(self):
|
||||||
|
|||||||
@@ -59,16 +59,21 @@ class AwqConfigTest(unittest.TestCase):
|
|||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
AwqConfig(bits=4, backend="unexisting-backend")
|
AwqConfig(bits=4, backend="unexisting-backend")
|
||||||
|
|
||||||
compute_capability = torch.cuda.get_device_capability()
|
# Only cuda device can run this function
|
||||||
major, minor = compute_capability
|
support_llm_awq = False
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
compute_capability = torch.cuda.get_device_capability()
|
||||||
|
major, minor = compute_capability
|
||||||
|
if major >= 8:
|
||||||
|
support_llm_awq = True
|
||||||
|
|
||||||
if major < 8:
|
if support_llm_awq:
|
||||||
|
# LLMAWQ should work on an A100
|
||||||
|
AwqConfig(bits=4, backend="llm-awq")
|
||||||
|
else:
|
||||||
# LLMAWQ does not work on a T4
|
# LLMAWQ does not work on a T4
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
AwqConfig(bits=4, backend="llm-awq")
|
AwqConfig(bits=4, backend="llm-awq")
|
||||||
else:
|
|
||||||
# LLMAWQ should work on an A100
|
|
||||||
AwqConfig(bits=4, backend="llm-awq")
|
|
||||||
|
|
||||||
def test_to_dict(self):
|
def test_to_dict(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user