From a325409a5051d68879030214e9c33180505f0d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Ouazan?= <83456801+remi-or@users.noreply.github.com> Date: Mon, 7 Jul 2025 11:42:33 +0200 Subject: [PATCH] Expectations re-order and corrected FA3 skip (#39195) * Fix Expectations and a FA3 skip * Fixed docstring * Added context for Default expectation --- src/transformers/testing_utils.py | 22 +++++++++++++++------- tests/test_modeling_common.py | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 78349b8b90..f9cc588797 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -3338,17 +3338,25 @@ class Expectations(UserDict[PackedDeviceProperties, Any]): return [(unpack_device_properties(k), v) for k, v in self.data.items()] @staticmethod - def is_default(properties: DeviceProperties) -> bool: - return all(p is None for p in properties) + def is_default(expectation_key: PackedDeviceProperties) -> bool: + """ + This function returns True if the expectation_key is the Default expectation (None, None). + When an Expectation dict contains a Default value, it is generally because the test existed before Expectations. + When we modify a test to use Expectations for a specific hardware, we don't want to affect the tests on other + hardwares. Thus we set the previous value as the Default expectation with key (None, None) and add a value for + the specific hardware with key (hardware_type, (major, minor)). + """ + return all(p is None for p in expectation_key) @staticmethod def score(properties: DeviceProperties, other: DeviceProperties) -> float: """ Returns score indicating how similar two instances of the `Properties` tuple are. Rules are as follows: - * Matching `type` adds one point, semi-matching `type` adds half a point (e.g. cuda and rocm). + * Matching `type` adds one point, semi-matching `type` adds 0.1 point (e.g. cuda and rocm). * If types match, matching `major` adds another point, and then matching `minor` adds another. - * Default expectation (if present) is worth 0.1 point to distinguish it from a straight-up zero. + * The Default expectation (None, None) is worth 0.5 point, which is better than semi-matching. More on this + in the `is_default` function. """ device_type, major, minor = properties other_device_type, other_major, other_minor = other @@ -3361,13 +3369,13 @@ class Expectations(UserDict[PackedDeviceProperties, Any]): score += 1 if minor is not None and minor == other_minor: score += 1 - # Semi-matching device type + # Semi-matching device type, which carries less importance than the default expectation elif device_type in ["cuda", "rocm"] and other_device_type in ["cuda", "rocm"]: - score = 0.5 + score = 0.1 # Default expectation if Expectations.is_default(other): - score = 0.1 + score = 0.5 return score diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index dcdffe6317..e7bd7a1603 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4306,7 +4306,7 @@ class ModelTesterMixin: def test_flash_attn_2_from_config(self): self.flash_attn_from_config(attn_implementation="flash_attention_2") - @require_flash_attn + @require_flash_attn_3 @require_torch_gpu @mark.flash_attn_3_test @slow