[FEAT] Add Neftune into transformers Trainer (#27141)
* add v1 neftune * use `unwrap_model` instead * add test + docs * Apply suggestions from code review Co-authored-by: Zach Mueller <muellerzr@gmail.com> * more details * fixup * Update docs/source/en/main_classes/trainer.md Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * refactor a bit * more elaborated test * fix unwrap issue --------- Co-authored-by: Zach Mueller <muellerzr@gmail.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -838,6 +838,50 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
train_output = trainer.train()
|
||||
self.assertEqual(train_output.global_step, 10)
|
||||
|
||||
def test_neftune(self):
|
||||
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
|
||||
tiny_gpt2 = GPT2LMHeadModel(config)
|
||||
x = torch.randint(0, 100, (128,))
|
||||
train_dataset = RepeatDataset(x)
|
||||
|
||||
# Trainer without inf/nan filter
|
||||
args = TrainingArguments(
|
||||
"./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
|
||||
)
|
||||
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
|
||||
|
||||
trainer.model = trainer._activate_neftune(trainer.model)
|
||||
|
||||
dummy_input = torch.LongTensor([[1, 0, 1]]).to(torch_device)
|
||||
|
||||
emb1 = trainer.model.get_input_embeddings()(dummy_input)
|
||||
emb2 = trainer.model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertFalse(torch.allclose(emb1, emb2), "Neftune noise is not applied!")
|
||||
|
||||
# redefine the model
|
||||
tiny_gpt2 = GPT2LMHeadModel(config)
|
||||
# Trainer without inf/nan filter
|
||||
args = TrainingArguments(
|
||||
"./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
|
||||
)
|
||||
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
|
||||
|
||||
# Check that it trains without errors
|
||||
trainer.train()
|
||||
|
||||
# Make sure forward pass works fine
|
||||
_ = trainer.model(dummy_input)
|
||||
self.assertTrue(len(trainer.model.get_input_embeddings()._forward_hooks) == 0)
|
||||
|
||||
trainer.model.eval()
|
||||
|
||||
# Check that we get identical embeddings just in case
|
||||
emb1 = trainer.model.get_input_embeddings()(dummy_input)
|
||||
emb2 = trainer.model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertTrue(torch.allclose(emb1, emb2), "Neftune noise is still applied!")
|
||||
|
||||
def test_logging_inf_nan_filter(self):
|
||||
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
|
||||
tiny_gpt2 = GPT2LMHeadModel(config)
|
||||
|
||||
Reference in New Issue
Block a user