From 2593932f10684d7522183bab30b179a80d9a7d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Ouazan?= <83456801+remi-or@users.noreply.github.com> Date: Thu, 7 Aug 2025 09:57:21 +0200 Subject: [PATCH] Gemma3 fixes (#39960) * Fix multiple devices issue * Added expectations for rocm 9.4 * Ruff --- src/transformers/models/gemma3/modeling_gemma3.py | 4 +++- src/transformers/models/gemma3/modular_gemma3.py | 4 +++- tests/models/gemma3/test_modeling_gemma3.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index 92fa45a663..c5bfb9f373 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -922,7 +922,9 @@ class Gemma3Model(Gemma3PreTrainedModel): is_image = (token_type_ids == 1).to(cache_position.device) new_image_start = is_image & ~nn.functional.pad(is_image, (1, 0), value=0)[:, :-1] image_group_ids = torch.cumsum(new_image_start.int(), dim=1) - 1 - image_group_ids = torch.where(is_image, image_group_ids, torch.full_like(token_type_ids, -1)) + image_group_ids = torch.where( + is_image, image_group_ids, torch.full_like(token_type_ids, -1, device=is_image.device) + ) mask_kwargs["or_mask_function"] = token_type_ids_mask_function( token_type_ids.to(cache_position.device), image_group_ids, self.config.mm_tokens_per_image ) diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index 0ca7505c56..df379b5544 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -830,7 +830,9 @@ class Gemma3Model(PaliGemmaModel): is_image = (token_type_ids == 1).to(cache_position.device) new_image_start = is_image & ~nn.functional.pad(is_image, (1, 0), value=0)[:, :-1] image_group_ids = torch.cumsum(new_image_start.int(), dim=1) - 1 - image_group_ids = torch.where(is_image, image_group_ids, torch.full_like(token_type_ids, -1)) + image_group_ids = torch.where( + is_image, image_group_ids, torch.full_like(token_type_ids, -1, device=is_image.device) + ) mask_kwargs["or_mask_function"] = token_type_ids_mask_function( token_type_ids.to(cache_position.device), image_group_ids, self.config.mm_tokens_per_image ) diff --git a/tests/models/gemma3/test_modeling_gemma3.py b/tests/models/gemma3/test_modeling_gemma3.py index 43ac57dbb5..8eee01fc17 100644 --- a/tests/models/gemma3/test_modeling_gemma3.py +++ b/tests/models/gemma3/test_modeling_gemma3.py @@ -507,6 +507,7 @@ class Gemma3IntegrationTest(unittest.TestCase): { ("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach with turquoise water in the background. It looks like a lovely,'], ("cuda", 8): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like'], + ("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear blue water and a blue sky in the background. It looks like'], ("rocm", (9, 5)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach with turquoise water and a distant coastline in the background. It looks'], } ) # fmt: skip @@ -559,6 +560,11 @@ class Gemma3IntegrationTest(unittest.TestCase): 'user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear blue water and a blue sky in the background. It looks like', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, these images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Image 1:** Shows a brown" ], + ("rocm", (9, 4)): + [ + 'user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like', + "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, these images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Image 1:** Shows a cow" + ], ("rocm", (9, 5)): [ 'user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. There are some clouds in the blue', @@ -602,6 +608,7 @@ class Gemma3IntegrationTest(unittest.TestCase): ("xpu", 3): ['user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There are clouds in the blue sky above.'], ("cuda", 7): [], ("cuda", 8): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a bright blue sky with some white clouds in the"], + ("rocm", (9, 4)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a blue sky with some white clouds in the background"] } ) # fmt: skip EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation() @@ -661,6 +668,10 @@ class Gemma3IntegrationTest(unittest.TestCase): "user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a bright blue sky with some white clouds in the", 'user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nThe first image shows a cow on a beach, while the second image shows a street scene with a' ], + ("rocm", (9, 4)) : [ + "user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a blue sky with some white clouds in the background", + 'user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nThe first image shows a cow on a beach, while the second image shows a street scene with a' + ], ("rocm", (9, 5)) : [ 'user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There are clouds in the blue sky above.', 'user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nThe first image shows a cow on a beach, while the second image shows a street scene with a', @@ -704,6 +715,7 @@ class Gemma3IntegrationTest(unittest.TestCase): ("xpu", 3): ["user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nOkay, let's break down what I see in this image!\n\nHere's a description of the scene:\n\n* **Chinese Arch"], ("cuda", 7): [], ("cuda", 8): ["user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nOkay, let's break down what I see in this image:\n\n**Overall Scene:**\n\nIt looks like a street scene in a vibrant,"], + ("rocm", (9, 4)): ["user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nOkay, let's break down what I see in this image:\n\n**Main Features:**\n\n* **Chinese Archway:** The most prominent"], } ) # fmt: skip EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation() @@ -725,6 +737,7 @@ class Gemma3IntegrationTest(unittest.TestCase): ("xpu", 3): ['Write a poem about Machine Learning.\n\n---\n\nThe data flows, a river deep,\nWith patterns hidden, secrets sleep.\nA neural net, a watchful eye,\nLearning'], ("cuda", 7): ['Write a poem about Machine Learning.\n\n---\n\nThe data flows, a silent stream,\nInto the neural net, a waking dream.\nAlgorithms hum, a coded grace,\n'], ("cuda", 8): ['Write a poem about Machine Learning.\n\n---\n\nThe data flows, a silent stream,\nInto the neural net, a waking dream.\nAlgorithms hum, a coded grace,\n'], + ("rocm", (9, 4)): ['Write a poem about Machine Learning.\n\n---\n\nThe data flows, a silent stream,\nInto the neural net, a waking dream.\nAlgorithms hum, a coded grace,\n'], ("rocm", (9, 5)): ['Write a poem about Machine Learning.\n\n---\n\nThe data flows, a river deep,\nWith patterns hidden, secrets sleep.\nA neural net, a watchful eye,\nLearning'], } ) # fmt: skip