[RoBERTa-based] Add support for sdpa (#30510)
* Adding SDPA support for RoBERTa-based models * add not is_cross_attention * fix copies * fix test * add minimal test for camembert and xlm_roberta as their test class does not inherit from ModelTesterMixin * address some review comments * use copied from * style * consistency * fix lists --------- Co-authored-by: fxmarty <9808326+fxmarty@users.noreply.github.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
@@ -16,7 +16,14 @@
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_sdpa,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
@@ -31,7 +38,7 @@ if is_torch_available():
|
||||
class CamembertModelIntegrationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_output_embeds_base_model(self):
|
||||
model = CamembertModel.from_pretrained("almanach/camembert-base")
|
||||
model = CamembertModel.from_pretrained("almanach/camembert-base", attn_implementation="eager")
|
||||
model.to(torch_device)
|
||||
|
||||
input_ids = torch.tensor(
|
||||
@@ -54,3 +61,24 @@ class CamembertModelIntegrationTest(unittest.TestCase):
|
||||
# expected_slice = roberta.model.forward(input_ids)[0][:, :3, :3].detach()
|
||||
|
||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
@slow
|
||||
@require_torch_sdpa
|
||||
def test_output_embeds_base_model_sdpa(self):
|
||||
input_ids = torch.tensor(
|
||||
[[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]],
|
||||
device=torch_device,
|
||||
dtype=torch.long,
|
||||
) # J'aime le camembert !
|
||||
|
||||
expected_slice = torch.tensor(
|
||||
[[[-0.0254, 0.0235, 0.1027], [0.0606, -0.1811, -0.0418], [-0.1561, -0.1127, 0.2687]]],
|
||||
device=torch_device,
|
||||
dtype=torch.float,
|
||||
)
|
||||
|
||||
model = CamembertModel.from_pretrained("almanach/camembert-base", attn_implementation="sdpa").to(torch_device)
|
||||
with torch.no_grad():
|
||||
output = model(input_ids)["last_hidden_state"].detach()
|
||||
|
||||
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
|
||||
|
||||
Reference in New Issue
Block a user