Fix attn mask ignore logic in training-time trace (#32613)

* fix attn mask logic for training-time trace * add test * fix * fix * fix * fix * fix * format * [run-slow] llama * avoid accelearate * [run-slow] llama
2024-10-04 13:00:45 -04:00
parent 614660fdb9
commit 0d1692a49b
7 changed files with 55 additions and 5 deletions
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -321,6 +321,9 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
    # used in `test_torch_compile`
    _torch_compile_test_ckpt = "google/gemma-2b"

+    # used in `test_torch_compile_for_training`
+    _torch_compile_train_cls = GemmaForCausalLM if is_torch_available() else None
+
    # TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79245/workflows/9490ef58-79c2-410d-8f51-e3495156cf9c/jobs/1012146
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
@@ -808,7 +811,7 @@ class GemmaIntegrationTest(unittest.TestCase):

        prompts = ["Hello I am doing", "Hi today"]
        tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
-        model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map="sequential", torch_dtype=torch.float16)
+        model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map=torch_device, torch_dtype=torch.float16)
        inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)

        # Dynamic Cache