Modernbert fixes (#38912)
* Removed deprecated argument in modernbert RotaryEmbedding * Skip test_sdpa_can_dispatch_on_flash for modernbert --------- Co-authored-by: ivarflakstad <69173633+ivarflakstad@users.noreply.github.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -154,7 +154,7 @@ class ModernBertUnpaddedRotaryEmbedding(RotaryEmbedding):
|
|||||||
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
||||||
the cos_sin_cache will be recomputed during the forward pass.
|
the cos_sin_cache will be recomputed during the forward pass.
|
||||||
"""
|
"""
|
||||||
super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False)
|
super().__init__(dim=dim, base=base, device=device, interleaved=False)
|
||||||
self.max_seqlen = max_seqlen
|
self.max_seqlen = max_seqlen
|
||||||
|
|
||||||
if max_seqlen is not None and device is not None and dtype is not None:
|
if max_seqlen is not None and device is not None and dtype is not None:
|
||||||
|
|||||||
@@ -417,7 +417,7 @@ class ModernBertUnpaddedRotaryEmbedding(RotaryEmbedding):
|
|||||||
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ,
|
||||||
the cos_sin_cache will be recomputed during the forward pass.
|
the cos_sin_cache will be recomputed during the forward pass.
|
||||||
"""
|
"""
|
||||||
super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False)
|
super().__init__(dim=dim, base=base, device=device, interleaved=False)
|
||||||
self.max_seqlen = max_seqlen
|
self.max_seqlen = max_seqlen
|
||||||
|
|
||||||
if max_seqlen is not None and device is not None and dtype is not None:
|
if max_seqlen is not None and device is not None and dtype is not None:
|
||||||
|
|||||||
@@ -3795,6 +3795,10 @@ class ModelTesterMixin:
|
|||||||
self.skipTest(
|
self.skipTest(
|
||||||
"PaliGemma-like models currently (transformers==4.41.0) requires an attention_mask input"
|
"PaliGemma-like models currently (transformers==4.41.0) requires an attention_mask input"
|
||||||
)
|
)
|
||||||
|
if config.model_type in ["modernbert"]:
|
||||||
|
self.skipTest(
|
||||||
|
reason="ModernBert currently (transformers==4.52.0) automatically adds an attention_mask input"
|
||||||
|
)
|
||||||
if config.model_type in ["idefics", "idefics2", "idefics3"]:
|
if config.model_type in ["idefics", "idefics2", "idefics3"]:
|
||||||
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
||||||
if config.model_type in ["sam"]:
|
if config.model_type in ["sam"]:
|
||||||
|
|||||||
Reference in New Issue
Block a user