Compile compatibilty for decoder-only models (#32617)

* squash into one commit

* add qwen2-vl for rope standardization

* fix mistral compile

* fix qwen2-vl

* fix-copies
This commit is contained in:
Raushan Turganbay
2024-09-09 10:59:04 +02:00
committed by GitHub
parent eedd21b9e7
commit 65bb284448
37 changed files with 2301 additions and 1367 deletions

View File

@@ -4640,7 +4640,7 @@ class ModelTesterMixin:
if not model_class._supports_static_cache:
self.skipTest(f"{model_class.__name__} is not guaranteed to work with custom 4D attention masks")
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
if getattr(config, "sliding_window", 0) > 0:
if getattr(config, "sliding_window", 0) is not None and getattr(config, "sliding_window", 0) > 0:
self.skipTest(f"{model_class.__name__} with sliding window attention is not supported by this test")
model = model_class(config).to(device=torch_device, dtype=torch.float32)
@@ -4689,7 +4689,7 @@ class ModelTesterMixin:
self.skipTest(f"{model_class.__name__} does not support cache class")
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
if getattr(config, "sliding_window", 0) > 0:
if getattr(config, "sliding_window", 0) is not None and getattr(config, "sliding_window", 0) > 0:
self.skipTest(f"{model_class.__name__} with sliding window attention is not supported by this test")
model = model_class(config).to(device=torch_device, dtype=torch.float32)