From 2100ee654569d323bfb77266cd3a75070abfda97 Mon Sep 17 00:00:00 2001 From: Yao Matrix Date: Mon, 30 Jun 2025 17:49:03 +0800 Subject: [PATCH] fix UT failures on XPU w/ stock PyTorch 2.7 & 2.8 (#39116) * fix UT failures on XPU w/ stock PyTorch 2.7 & 2.8 Signed-off-by: YAO Matrix * zamba2 Signed-off-by: YAO Matrix * xx Signed-off-by: YAO Matrix * internvl Signed-off-by: YAO Matrix * tp cases Signed-off-by: YAO Matrix --------- Signed-off-by: YAO Matrix --- tests/models/cohere2/test_modeling_cohere2.py | 4 + .../test_modeling_grounding_dino.py | 79 +++++++++++++------ .../test_modeling_instructblip.py | 28 +++++-- .../models/internvl/test_modeling_internvl.py | 4 +- tests/models/llama4/test_modeling_llama4.py | 15 +++- tests/models/zamba2/test_modeling_zamba2.py | 26 ++++-- tests/quantization/bnb/test_4bit.py | 8 +- tests/tensor_parallel/test_tensor_parallel.py | 6 +- 8 files changed, 119 insertions(+), 51 deletions(-) diff --git a/tests/models/cohere2/test_modeling_cohere2.py b/tests/models/cohere2/test_modeling_cohere2.py index 4338e4a070..0f8739bb66 100644 --- a/tests/models/cohere2/test_modeling_cohere2.py +++ b/tests/models/cohere2/test_modeling_cohere2.py @@ -24,6 +24,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, Cohere2Config, is_ from transformers.generation.configuration_utils import GenerationConfig from transformers.testing_utils import ( Expectations, + cleanup, is_flash_attn_2_available, require_flash_attn, require_read_token, @@ -136,6 +137,9 @@ class Cohere2ModelTest(CohereModelTest, unittest.TestCase): class Cohere2IntegrationTest(unittest.TestCase): input_text = ["Hello I am doing", "Hi today"] + def tearDown(self): + cleanup(torch_device, gc_collect=True) + def test_model_bf16(self): model_id = "CohereForAI/c4ai-command-r7b-12-2024" EXPECTED_TEXTS = [ diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py index 84636954a9..2afe3f0ef3 100644 --- a/tests/models/grounding_dino/test_modeling_grounding_dino.py +++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py @@ -29,6 +29,7 @@ from transformers import ( ) from transformers.file_utils import cached_property from transformers.testing_utils import ( + Expectations, is_flaky, require_timm, require_torch, @@ -804,34 +805,62 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase): with torch.no_grad(): outputs = model(**text_inputs, **image_inputs) - # Loss differs by CPU and GPU, also this can be changed in future. - expected_loss_dict = { - "loss_ce": torch.tensor(1.1147), - "loss_bbox": torch.tensor(0.2031), - "loss_giou": torch.tensor(0.5819), - "loss_ce_0": torch.tensor(1.1941), - "loss_bbox_0": torch.tensor(0.1978), - "loss_giou_0": torch.tensor(0.5524), - "loss_ce_1": torch.tensor(1.1621), - "loss_bbox_1": torch.tensor(0.1909), - "loss_giou_1": torch.tensor(0.5892), - "loss_ce_2": torch.tensor(1.1641), - "loss_bbox_2": torch.tensor(0.1892), - "loss_giou_2": torch.tensor(0.5626), - "loss_ce_3": torch.tensor(1.1943), - "loss_bbox_3": torch.tensor(0.1941), - "loss_giou_3": torch.tensor(0.5607), - "loss_ce_4": torch.tensor(1.0956), - "loss_bbox_4": torch.tensor(0.2008), - "loss_giou_4": torch.tensor(0.5836), - "loss_ce_enc": torch.tensor(16226.3164), - "loss_bbox_enc": torch.tensor(0.3063), - "loss_giou_enc": torch.tensor(0.7380), - } + # Loss differs by CPU and accelerator, also this can be changed in future. + expected_loss_dicts = Expectations( + { + ("xpu", 3): { + "loss_ce": torch.tensor(1.1147), + "loss_bbox": torch.tensor(0.2031), + "loss_giou": torch.tensor(0.5819), + "loss_ce_0": torch.tensor(1.1941), + "loss_bbox_0": torch.tensor(0.1978), + "loss_giou_0": torch.tensor(0.5524), + "loss_ce_1": torch.tensor(1.1621), + "loss_bbox_1": torch.tensor(0.1909), + "loss_giou_1": torch.tensor(0.5892), + "loss_ce_2": torch.tensor(1.1641), + "loss_bbox_2": torch.tensor(0.1892), + "loss_giou_2": torch.tensor(0.5626), + "loss_ce_3": torch.tensor(1.1943), + "loss_bbox_3": torch.tensor(0.1941), + "loss_giou_3": torch.tensor(0.5592), + "loss_ce_4": torch.tensor(1.0956), + "loss_bbox_4": torch.tensor(0.2037), + "loss_giou_4": torch.tensor(0.5813), + "loss_ce_enc": torch.tensor(16226.3164), + "loss_bbox_enc": torch.tensor(0.3063), + "loss_giou_enc": torch.tensor(0.7380), + }, + ("cuda", None): { + "loss_ce": torch.tensor(1.1147), + "loss_bbox": torch.tensor(0.2031), + "loss_giou": torch.tensor(0.5819), + "loss_ce_0": torch.tensor(1.1941), + "loss_bbox_0": torch.tensor(0.1978), + "loss_giou_0": torch.tensor(0.5524), + "loss_ce_1": torch.tensor(1.1621), + "loss_bbox_1": torch.tensor(0.1909), + "loss_giou_1": torch.tensor(0.5892), + "loss_ce_2": torch.tensor(1.1641), + "loss_bbox_2": torch.tensor(0.1892), + "loss_giou_2": torch.tensor(0.5626), + "loss_ce_3": torch.tensor(1.1943), + "loss_bbox_3": torch.tensor(0.1941), + "loss_giou_3": torch.tensor(0.5607), + "loss_ce_4": torch.tensor(1.0956), + "loss_bbox_4": torch.tensor(0.2008), + "loss_giou_4": torch.tensor(0.5836), + "loss_ce_enc": torch.tensor(16226.3164), + "loss_bbox_enc": torch.tensor(0.3063), + "loss_giou_enc": torch.tensor(0.7380), + }, + } + ) # fmt: skip + expected_loss_dict = expected_loss_dicts.get_expectation() expected_loss = torch.tensor(32482.2305) for key in expected_loss_dict: - self.assertTrue(torch.allclose(outputs.loss_dict[key], expected_loss_dict[key], atol=1e-3)) + torch.testing.assert_close(outputs.loss_dict[key], expected_loss_dict[key], rtol=1e-5, atol=1e-3) self.assertTrue(torch.allclose(outputs.loss, expected_loss, atol=1e-3)) diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index 5a7dbae587..66621fc0fe 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -30,6 +30,8 @@ from transformers import ( InstructBlipVisionConfig, ) from transformers.testing_utils import ( + Expectations, + cleanup, require_accelerate, require_bitsandbytes, require_torch, @@ -722,6 +724,9 @@ def prepare_img(): @require_torch @slow class InstructBlipModelIntegrationTest(unittest.TestCase): + def tearDown(self): + cleanup(torch_device, gc_collect=False) + @require_bitsandbytes @require_accelerate def test_inference_vicuna_7b(self): @@ -739,13 +744,24 @@ class InstructBlipModelIntegrationTest(unittest.TestCase): outputs = model.generate(**inputs, max_new_tokens=30) generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip() - expected_outputs = [32001] * 32 + [2, 1724, 338, 22910, 1048, 445, 1967, 29973, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889] # fmt: off + expected_outputs = Expectations( + { + ("xpu", 3): [32001] * 32 + [2, 1724, 338, 22910, 1048, 445, 1967, 29973, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889], + ("cuda", None): [32001] * 32 + [2, 1724, 338, 22910, 1048, 445, 1967, 29973, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889], + } + ) # fmt: off + expected_output = expected_outputs.get_expectation() - self.assertEqual(outputs[0].tolist(), expected_outputs) - self.assertEqual( - generated_text, - "What is unusual about this image? The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.", - ) + expected_texts = Expectations( + { + ("xpu", 3): "What is unusual about this image? The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.", + ("cuda", None): "What is unusual about this image? The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.", + } + ) # fmt: off + expected_text = expected_texts.get_expectation() + + self.assertEqual(outputs[0].tolist(), expected_output) + self.assertEqual(generated_text, expected_text) def test_inference_flant5_xl(self): processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-flan-t5-xl") diff --git a/tests/models/internvl/test_modeling_internvl.py b/tests/models/internvl/test_modeling_internvl.py index d7e1132be6..19eb3cc4c7 100644 --- a/tests/models/internvl/test_modeling_internvl.py +++ b/tests/models/internvl/test_modeling_internvl.py @@ -430,7 +430,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase): expected_outputs = Expectations( { - ("xpu", 3): 'user\n\nDescribe this image\nassistant\nThe image shows a street scene with a traditional Chinese archway, known as a "Chinese Gate" or "Chinese Gate"', + ("xpu", 3): 'user\n\nDescribe this image\nassistant\nThe image shows a street scene with a traditional Chinese archway, known as a "Chinese Gate" or "Chinese Gate of', ("cuda", 7): 'user\n\nDescribe this image\nassistant\nThe image shows a street scene with a traditional Chinese archway, known as a "Chinese Gate" or "Chinese Gate of', } ) # fmt: skip @@ -793,7 +793,7 @@ class InternVLLlamaIntegrationTest(unittest.TestCase): decoded_output = processor.decode(output[0], skip_special_tokens=True) expected_outputs = Expectations( { - ("xpu", 3): "user\n\nWrite a haiku for this image\nassistant\nMajestic snow-capped peaks,\nWooden path leads to calm lake,\nNature's peaceful grace.", + ("xpu", 3): "user\n\nWrite a haiku for this image\nassistant\nMajestic snow-capped peaks,\nWooden dock stretches to the sea,\nSilent water mirrors.", ("cuda", 7): 'user\n\nWrite a haiku for this image\nassistant\nMajestic snow-capped peaks,\nWooden dock stretches to the sea,\nSilent water mirrors.', ("cuda", 8): 'user\n\nWrite a haiku for this image\nassistant\nMajestic snow-capped peaks,\nWooden dock stretches to the sea,\nSilent water mirrors.', } diff --git a/tests/models/llama4/test_modeling_llama4.py b/tests/models/llama4/test_modeling_llama4.py index dc8a3dc1e6..5ecc4732a2 100644 --- a/tests/models/llama4/test_modeling_llama4.py +++ b/tests/models/llama4/test_modeling_llama4.py @@ -17,6 +17,8 @@ import unittest from transformers import is_torch_available from transformers.testing_utils import ( + Expectations, + cleanup, require_read_token, require_torch_large_accelerator, slow, @@ -78,10 +80,17 @@ class Llama4IntegrationTest(unittest.TestCase): }, ] + def tearDown(self): + cleanup(torch_device, gc_collect=True) + def test_model_17b_16e_fp16(self): - EXPECTED_TEXT = [ - 'system\n\nYou are a helpful assistant.user\n\nWhat is shown in this image?assistant\n\nThe image shows a cow standing on a beach, with a blue sky and a body of water in the background. The cow is brown with a white' - ] # fmt: skip + EXPECTED_TEXTS = Expectations( + { + ("xpu", 3): ['system\n\nYou are a helpful assistant.user\n\nWhat is shown in this image?assistant\n\nThe image shows a cow standing on a beach with a blue sky and a body of water in the background. The cow is brown with a white face'], + ("cuda", None): ['system\n\nYou are a helpful assistant.user\n\nWhat is shown in this image?assistant\n\nThe image shows a cow standing on a beach, with a blue sky and a body of water in the background. The cow is brown with a white'], + } + ) # fmt: skip + EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation() inputs = self.processor.apply_chat_template( self.messages_1, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True diff --git a/tests/models/zamba2/test_modeling_zamba2.py b/tests/models/zamba2/test_modeling_zamba2.py index a40ea394f9..3f35a54acb 100644 --- a/tests/models/zamba2/test_modeling_zamba2.py +++ b/tests/models/zamba2/test_modeling_zamba2.py @@ -22,6 +22,7 @@ from parameterized import parameterized from transformers import AutoTokenizer, Zamba2Config, is_torch_available from transformers.testing_utils import ( + Expectations, require_bitsandbytes, require_flash_attn, require_torch, @@ -678,14 +679,23 @@ class Zamba2ModelIntegrationTest(unittest.TestCase): ] , dtype=torch.float32) # fmt: skip - EXPECTED_LOGITS_NO_GRAD_1 = torch.tensor( - [ - 0.1966, 6.3449, 3.8350, -5.7291, -6.5106, -6.5104, -6.5103, -6.5104, - -6.5103, -6.5104, -6.5106, -6.5105, 7.8700, 13.5434, -6.5104, -6.5096, - -6.5106, -6.5102, -6.5106, -6.5106, -6.5105, -6.5106, -6.5104, -6.5106, - -6.5105, -6.5106, -6.5106, -6.5113, -6.5102, -6.5105, -6.5108, -6.5105, - -6.5104, -6.5106, -6.5106, -6.5104, -6.5106, -6.5107, -6.5103, -6.5105 ] - , dtype=torch.float32) # fmt: skip + EXPECTED_LOGITS_NO_GRAD_1S = Expectations( + { + ("xpu", 3): torch.tensor([0.2027, 6.3481, 3.8392, -5.7279, -6.5090, -6.5088, -6.5087, -6.5088, + -6.5087, -6.5088, -6.5090, -6.5089, 7.8796, 13.5483, -6.5088, -6.5080, + -6.5090, -6.5086, -6.5090, -6.5090, -6.5089, -6.5090, -6.5088, -6.5090, + -6.5089, -6.5090, -6.5090, -6.5097, -6.5086, -6.5089, -6.5092, -6.5089, + -6.5088, -6.5090, -6.5090, -6.5088, -6.5090, -6.5091, -6.5087, -6.5089], + dtype=torch.float32), + ("cuda", None): torch.tensor([0.1966, 6.3449, 3.8350, -5.7291, -6.5106, -6.5104, -6.5103, -6.5104, + -6.5103, -6.5104, -6.5106, -6.5105, 7.8700, 13.5434, -6.5104, -6.5096, + -6.5106, -6.5102, -6.5106, -6.5106, -6.5105, -6.5106, -6.5104, -6.5106, + -6.5105, -6.5106, -6.5106, -6.5113, -6.5102, -6.5105, -6.5108, -6.5105, + -6.5104, -6.5106, -6.5106, -6.5104, -6.5106, -6.5107, -6.5103, -6.5105], + dtype=torch.float32), + } + ) # fmt: skip + EXPECTED_LOGITS_NO_GRAD_1 = EXPECTED_LOGITS_NO_GRAD_1S.get_expectation() torch.testing.assert_close(logits[0, -1, :40].cpu(), EXPECTED_LOGITS_NO_GRAD_0, rtol=1e-3, atol=1e-3) torch.testing.assert_close( diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 5887445bbc..9dc0bc396d 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -520,14 +520,14 @@ class Pipeline4BitTest(Base4bitTest): @require_torch_multi_accelerator @apply_skip_if_not_implemented -class Bnb4bitTestMultiGpu(Base4bitTest): +class Bnb4bitTestMultiAccelerator(Base4bitTest): def setUp(self): super().setUp() - def test_multi_gpu_loading(self): + def test_multi_accelerator_loading(self): r""" - This tests that the model has been loaded and can be used correctly on a multi-GPU setup. - Let's just try to load a model on 2 GPUs and see if it works. The model we test has ~2GB of total, 3GB should suffice + This tests that the model has been loaded and can be used correctly on a multi-accelerator setup. + Let's just try to load a model on 2 accelerators and see if it works. The model we test has ~2GB of total, 3GB should suffice """ device_map = { "transformer.word_embeddings": 0, diff --git a/tests/tensor_parallel/test_tensor_parallel.py b/tests/tensor_parallel/test_tensor_parallel.py index 6efd01c6cf..69abd550e5 100644 --- a/tests/tensor_parallel/test_tensor_parallel.py +++ b/tests/tensor_parallel/test_tensor_parallel.py @@ -24,7 +24,7 @@ from transformers.testing_utils import ( backend_device_count, get_torch_dist_unique_port, require_huggingface_hub_greater_or_equal, - require_torch_multi_gpu, + require_torch_multi_accelerator, torch_device, ) @@ -168,6 +168,6 @@ class TestTensorParallel(TestCasePlus): del non_tp_tensor, tp_tensor -@require_torch_multi_gpu -class TestTensorParallelCuda(TestTensorParallel): +@require_torch_multi_accelerator +class TestTensorParallelAccelerator(TestTensorParallel): nproc_per_node = backend_device_count(torch_device)