Fix attn mask ignore logic in training-time trace (#32613)
* fix attn mask logic for training-time trace * add test * fix * fix * fix * fix * fix * format * [run-slow] llama * avoid accelearate * [run-slow] llama
This commit is contained in:
@@ -677,7 +677,7 @@ class MistralIntegrationTest(unittest.TestCase):
|
||||
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
model = MistralForCausalLM.from_pretrained(
|
||||
"mistralai/Mistral-7B-v0.1", device_map="sequential", torch_dtype=torch.float16
|
||||
"mistralai/Mistral-7B-v0.1", device_map=torch_device, torch_dtype=torch.float16
|
||||
)
|
||||
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user