Gaudi3 CI (#38790)

2025-06-23 10:56:51 +02:00
parent 2166b6b4ff
commit 984ff89e73
16 changed files with 618 additions and 14 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -84,6 +84,7 @@ from transformers.testing_utils import (
    require_bitsandbytes,
    require_deepspeed,
    require_flash_attn,
+    require_non_hpu,
    require_safetensors,
    require_torch,
    require_torch_accelerator,
@@ -92,6 +93,7 @@ from transformers.testing_utils import (
    require_torch_multi_accelerator,
    require_torch_multi_gpu,
    require_torch_sdpa,
+    run_first,
    run_test_using_subprocess,
    set_config_for_less_flaky_test,
    set_model_for_less_flaky_test,
@@ -2797,6 +2799,7 @@ class ModelTesterMixin:
                    else:
                        torch.testing.assert_close(base_output[0], new_output[0], rtol=1e-5, atol=1e-5)

+    @require_non_hpu
    @require_accelerate
    @mark.accelerate_tests
    @require_torch_multi_accelerator
@@ -3727,6 +3730,9 @@ class ModelTesterMixin:
                if torch_device in ["cpu", "cuda"]:
                    atol = atols[torch_device, enable_kernels, torch_dtype]
                    rtol = rtols[torch_device, enable_kernels, torch_dtype]
+                elif torch_device == "hpu":
+                    atol = atols["cuda", enable_kernels, torch_dtype]
+                    rtol = rtols["cuda", enable_kernels, torch_dtype]
                elif torch_device == "xpu":
                    # As of PyTorch 2.5 XPU backend supports only torch.nn.attention.SDPBackend.MATH
                    # which is implemented on PyTorch level using aten operators and is
@@ -4666,6 +4672,7 @@ class ModelTesterMixin:

    # Here we need to run with a subprocess as otherwise setting back the default device to the default value ("cpu")
    # may bring unwanted consequences on other tests. See PR #37553
+    @run_first
    @run_test_using_subprocess
    @require_torch_accelerator
    def test_can_load_with_global_device_set(self):