Fix attn mask ignore logic in training-time trace (#32613)
* fix attn mask logic for training-time trace * add test * fix * fix * fix * fix * fix * format * [run-slow] llama * avoid accelearate * [run-slow] llama
This commit is contained in:
@@ -321,6 +321,9 @@ class GemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
# used in `test_torch_compile`
|
||||
_torch_compile_test_ckpt = "google/gemma-2b"
|
||||
|
||||
# used in `test_torch_compile_for_training`
|
||||
_torch_compile_train_cls = GemmaForCausalLM if is_torch_available() else None
|
||||
|
||||
# TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79245/workflows/9490ef58-79c2-410d-8f51-e3495156cf9c/jobs/1012146
|
||||
def is_pipeline_test_to_skip(
|
||||
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
|
||||
@@ -808,7 +811,7 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
|
||||
prompts = ["Hello I am doing", "Hi today"]
|
||||
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
|
||||
model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map="sequential", torch_dtype=torch.float16)
|
||||
model = GemmaForCausalLM.from_pretrained("google/gemma-2b", device_map=torch_device, torch_dtype=torch.float16)
|
||||
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
|
||||
|
||||
# Dynamic Cache
|
||||
|
||||
@@ -319,6 +319,9 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
# used in `test_torch_compile`
|
||||
_torch_compile_test_ckpt = "meta-llama/Llama-2-7b-hf"
|
||||
|
||||
# used in `test_torch_compile_for_training`
|
||||
_torch_compile_train_cls = LlamaForCausalLM if is_torch_available() else None
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = LlamaModelTester(self)
|
||||
self.config_tester = ConfigTester(self, config_class=LlamaConfig, hidden_size=37)
|
||||
@@ -874,7 +877,7 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
]
|
||||
tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="</s>", padding_side="right")
|
||||
model = LlamaForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", device_map="sequential", torch_dtype=torch.float16
|
||||
"meta-llama/Llama-2-7b-hf", device_map=torch_device, torch_dtype=torch.float16
|
||||
)
|
||||
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
|
||||
|
||||
|
||||
@@ -677,7 +677,7 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
model = MistralForCausalLM.from_pretrained(
|
||||
"mistralai/Mistral-7B-v0.1", device_map="sequential", torch_dtype=torch.float16
|
||||
"mistralai/Mistral-7B-v0.1", device_map=torch_device, torch_dtype=torch.float16
|
||||
)
|
||||
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
|
||||
|
||||
|
||||
@@ -94,6 +94,8 @@ class NemotronModelTest(GemmaModelTest):
|
||||
|
||||
# used in `test_torch_compile`
|
||||
_torch_compile_test_ckpt = "nvidia/nemotron-3-8b-base-4k-hf"
|
||||
# used in `test_torch_compile_for_training`
|
||||
_torch_compile_train_cls = NemotronForCausalLM if is_torch_available() else None
|
||||
|
||||
def setUp(self):
|
||||
self.model_tester = NemotronModelTester(self)
|
||||
|
||||
Reference in New Issue
Block a user