Fix SDPA attention precision issue in Qwen2.5-VL (#37363)

* solve conflicts and remove redundant attention_mask in qwenvit * update decoded text check * remove trailing whitespace
2025-07-09 13:03:44 +08:00
parent 0e1c281745
commit 25343aafee
8 changed files with 201 additions and 245 deletions
--- a/tests/models/glm4v/test_modeling_glm4v.py
+++ b/tests/models/glm4v/test_modeling_glm4v.py
@@ -419,7 +419,7 @@ class Glm4vIntegrationTest(unittest.TestCase):
        output = model.generate(**inputs, max_new_tokens=30)

        EXPECTED_DECODED_TEXT = [
-            "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture has a stocky build, thick fur, and a face that's",
+            "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat. Specifically, it looks",
            "\nWhat kind of dog is this?\n<think>Got it, let's look at the image. Wait, the animals here are cats, not dogs. The question is about a dog, but"
        ]  # fmt: skip
        self.assertEqual(