[FEAT] Add Neftune into transformers Trainer (#27141)

* add v1 neftune

* use `unwrap_model` instead

* add test + docs

* Apply suggestions from code review

Co-authored-by: Zach Mueller <muellerzr@gmail.com>

* more details

* fixup

* Update docs/source/en/main_classes/trainer.md

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* refactor a bit

* more elaborated test

* fix unwrap issue

---------

Co-authored-by: Zach Mueller <muellerzr@gmail.com>
Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Younes Belkada
2023-10-31 16:03:59 +01:00
committed by GitHub
parent f53041a753
commit 309a90664f
5 changed files with 154 additions and 0 deletions

View File

@@ -838,6 +838,50 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
train_output = trainer.train()
self.assertEqual(train_output.global_step, 10)
def test_neftune(self):
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
tiny_gpt2 = GPT2LMHeadModel(config)
x = torch.randint(0, 100, (128,))
train_dataset = RepeatDataset(x)
# Trainer without inf/nan filter
args = TrainingArguments(
"./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
)
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
trainer.model = trainer._activate_neftune(trainer.model)
dummy_input = torch.LongTensor([[1, 0, 1]]).to(torch_device)
emb1 = trainer.model.get_input_embeddings()(dummy_input)
emb2 = trainer.model.get_input_embeddings()(dummy_input)
self.assertFalse(torch.allclose(emb1, emb2), "Neftune noise is not applied!")
# redefine the model
tiny_gpt2 = GPT2LMHeadModel(config)
# Trainer without inf/nan filter
args = TrainingArguments(
"./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
)
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
# Check that it trains without errors
trainer.train()
# Make sure forward pass works fine
_ = trainer.model(dummy_input)
self.assertTrue(len(trainer.model.get_input_embeddings()._forward_hooks) == 0)
trainer.model.eval()
# Check that we get identical embeddings just in case
emb1 = trainer.model.get_input_embeddings()(dummy_input)
emb2 = trainer.model.get_input_embeddings()(dummy_input)
self.assertTrue(torch.allclose(emb1, emb2), "Neftune noise is still applied!")
def test_logging_inf_nan_filter(self):
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
tiny_gpt2 = GPT2LMHeadModel(config)