Fix attn mask ignore logic in training-time trace (#32613)

* fix attn mask logic for training-time trace

* add test

* fix

* fix

* fix

* fix

* fix

* format

* [run-slow] llama

* avoid accelearate

* [run-slow] llama
This commit is contained in:
Longjie Zheng
2024-10-04 13:00:45 -04:00
committed by GitHub
parent 614660fdb9
commit 0d1692a49b
7 changed files with 55 additions and 5 deletions

View File

@@ -321,6 +321,9 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
# used in `test_torch_compile`
_torch_compile_test_ckpt = "google/gemma-2b"
# used in `test_torch_compile_for_training`
_torch_compile_train_cls = GemmaForCausalLM if is_torch_available() else None
# TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79245/workflows/9490ef58-79c2-410d-8f51-e3495156cf9c/jobs/1012146
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
@@ -808,7 +811,7 @@ class GemmaIntegrationTest(unittest.TestCase):
prompts = ["Hello I am doing", "Hi today"]
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map="sequential", torch_dtype=torch.float16)
model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map=torch_device, torch_dtype=torch.float16)
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
# Dynamic Cache