Never fallback to eager implicitly (#38327)

* remove arg everywhere

* Update warnings

* add more models

* Update sdpa_attention.py

* fix style

* fix

* readd warnings but not for flex

* Update test_modeling_common.py

* skip

* fix

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Cyril Vallez
2025-05-23 19:48:01 +02:00
committed by GitHub
parent e64ed0304c
commit e0aad278fe
73 changed files with 66 additions and 544 deletions

View File

@@ -4353,7 +4353,8 @@ class ModelTesterMixin:
if hasattr(config, "layer_types"):
del config_dict["layer_types"]
new_config = config.__class__(**config_dict)
model = model_class(new_config).to(torch_device)
# We need to set eager as otherwise `output_attentions` is not supported
model = model_class._from_config(new_config, attn_implementation="eager").to(torch_device)
model.eval()
layer_types = getattr(model.config, "layer_types", ["sliding_attention"] * config.num_hidden_layers)
attentions = model(**inputs, output_attentions=True).attentions
@@ -4370,7 +4371,8 @@ class ModelTesterMixin:
if hasattr(config, "layer_types"):
del config_dict["layer_types"]
new_config = config.__class__(**config_dict)
model = model_class(new_config).to(torch_device)
# We need to set eager as otherwise `output_attentions` is not supported
model = model_class._from_config(new_config, attn_implementation="eager").to(torch_device)
model.eval()
attentions_not_sliding = model(**inputs, output_attentions=True).attentions
for layer_attention in attentions_not_sliding: