Unbreak optimum-executorch (#38646)

* Unbreak optimum-executorch

* use static cache if has layer_types but no sliding_window

* revert view on kv_arange

---------

Co-authored-by: Guang Yang <guangyang@fb.com>
This commit is contained in:
Guang Yang
2025-06-13 02:13:32 -07:00
committed by GitHub
parent 5f59a9b439
commit 7f00b325f8
9 changed files with 64 additions and 39 deletions

View File

@@ -313,7 +313,6 @@ class Gemma2IntegrationTest(unittest.TestCase):
from transformers.integrations.executorch import (
TorchExportableModuleWithStaticCache,
convert_and_export_with_cache,
)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b", pad_token="</s>", padding_side="right")
@@ -363,7 +362,10 @@ class Gemma2IntegrationTest(unittest.TestCase):
max_new_tokens = max_generation_length - prompt_token_ids.shape[-1]
# Static Cache + export
exported_program = convert_and_export_with_cache(model)
from transformers.integrations.executorch import TorchExportableModuleForDecoderOnlyLM
exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
exported_program = exportable_module.export()
ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens
)