Modernbert fixes (#38912)
* Removed deprecated argument in modernbert RotaryEmbedding * Skip test_sdpa_can_dispatch_on_flash for modernbert --------- Co-authored-by: ivarflakstad <69173633+ivarflakstad@users.noreply.github.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -154,7 +154,7 @@ class ModernBertUnpaddedRotaryEmbedding(RotaryEmbedding):
|
||||
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
||||
the cos_sin_cache will be recomputed during the forward pass.
|
||||
"""
|
||||
super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False)
|
||||
super().__init__(dim=dim, base=base, device=device, interleaved=False)
|
||||
self.max_seqlen = max_seqlen
|
||||
|
||||
if max_seqlen is not None and device is not None and dtype is not None:
|
||||
|
||||
@@ -417,7 +417,7 @@ class ModernBertUnpaddedRotaryEmbedding(RotaryEmbedding):
|
||||
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
||||
the cos_sin_cache will be recomputed during the forward pass.
|
||||
"""
|
||||
super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False)
|
||||
super().__init__(dim=dim, base=base, device=device, interleaved=False)
|
||||
self.max_seqlen = max_seqlen
|
||||
|
||||
if max_seqlen is not None and device is not None and dtype is not None:
|
||||
|
||||
@@ -3795,6 +3795,10 @@ class ModelTesterMixin:
|
||||
self.skipTest(
|
||||
"PaliGemma-like models currently (transformers==4.41.0) requires an attention_mask input"
|
||||
)
|
||||
if config.model_type in ["modernbert"]:
|
||||
self.skipTest(
|
||||
reason="ModernBert currently (transformers==4.52.0) automatically adds an attention_mask input"
|
||||
)
|
||||
if config.model_type in ["idefics", "idefics2", "idefics3"]:
|
||||
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
||||
if config.model_type in ["sam"]:
|
||||
|
||||
Reference in New Issue
Block a user