Support multiple validation datasets when dataloader_persistent_workers=True (#30627)
* Support multiple validation datasets when dataloader_persistent_workers=True * Test support of multiple validation datasets
This commit is contained in:
committed by
GitHub
parent
147c404fb1
commit
485fd81471
@@ -1231,6 +1231,97 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
trainer.train()
|
||||
trainer.evaluate()
|
||||
|
||||
def test_get_eval_dataloader_without_persistent_workers(self):
|
||||
train_dataset = RegressionDataset()
|
||||
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
|
||||
tiny_gpt2 = GPT2LMHeadModel(config)
|
||||
args = TrainingArguments("./test", report_to="none", dataloader_persistent_workers=False)
|
||||
|
||||
# Single evaluation dataset
|
||||
eval_dataset = RegressionDataset()
|
||||
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset, eval_dataset=eval_dataset)
|
||||
# Mocking the prepare method to avoid the dataloader changing with each call to get_eval_dataloader
|
||||
trainer.accelerator.prepare = lambda x: x
|
||||
|
||||
default_dataloader = trainer.get_eval_dataloader()
|
||||
dataloader_with_dataset = trainer.get_eval_dataloader(eval_dataset)
|
||||
|
||||
self.assertEqual(default_dataloader.dataset, eval_dataset)
|
||||
self.assertEqual(dataloader_with_dataset.dataset, eval_dataset)
|
||||
self.assertNotEqual(default_dataloader, dataloader_with_dataset)
|
||||
|
||||
# Multiple evaluation datasets
|
||||
first_dataset = RegressionDataset()
|
||||
second_dataset = RegressionDataset()
|
||||
trainer = Trainer(
|
||||
tiny_gpt2,
|
||||
args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset={"first": first_dataset, "second": second_dataset},
|
||||
)
|
||||
# Mocking the prepare method to avoid the dataloader changing with each call to get_eval_dataloader
|
||||
trainer.accelerator.prepare = lambda x: x
|
||||
|
||||
first_dataloader = trainer.get_eval_dataloader("first")
|
||||
first_dataloader_repeated = trainer.get_eval_dataloader("first")
|
||||
second_dataloader = trainer.get_eval_dataloader("second")
|
||||
second_dataloader_repeated = trainer.get_eval_dataloader("second")
|
||||
|
||||
self.assertEqual(first_dataset, first_dataloader.dataset)
|
||||
self.assertEqual(first_dataloader.dataset, first_dataloader_repeated.dataset)
|
||||
self.assertEqual(second_dataset, second_dataloader.dataset)
|
||||
self.assertEqual(second_dataloader.dataset, second_dataloader_repeated.dataset)
|
||||
self.assertNotEqual(first_dataloader, first_dataloader_repeated)
|
||||
self.assertNotEqual(second_dataloader, second_dataloader_repeated)
|
||||
|
||||
def test_get_eval_dataloader_with_persistent_workers(self):
|
||||
train_dataset = RegressionDataset()
|
||||
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
|
||||
tiny_gpt2 = GPT2LMHeadModel(config)
|
||||
args = TrainingArguments(
|
||||
"./test",
|
||||
report_to="none",
|
||||
dataloader_persistent_workers=True,
|
||||
dataloader_num_workers=2,
|
||||
)
|
||||
|
||||
# Single evaluation dataset
|
||||
eval_dataset = RegressionDataset()
|
||||
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset, eval_dataset=eval_dataset)
|
||||
# Mocking the prepare method to avoid the dataloader changing with each call to get_eval_dataloader
|
||||
trainer.accelerator.prepare = lambda x: x
|
||||
|
||||
default_dataloader = trainer.get_eval_dataloader()
|
||||
dataloader_with_dataset = trainer.get_eval_dataloader(eval_dataset)
|
||||
|
||||
self.assertEqual(default_dataloader.dataset, eval_dataset)
|
||||
self.assertEqual(dataloader_with_dataset.dataset, eval_dataset)
|
||||
self.assertEqual(default_dataloader, dataloader_with_dataset)
|
||||
|
||||
# Multiple evaluation datasets
|
||||
first_dataset = RegressionDataset()
|
||||
second_dataset = RegressionDataset()
|
||||
trainer = Trainer(
|
||||
tiny_gpt2,
|
||||
args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset={"first": first_dataset, "second": second_dataset},
|
||||
)
|
||||
# Mocking the prepare method to avoid the dataloader changing with each call to get_eval_dataloader
|
||||
trainer.accelerator.prepare = lambda x: x
|
||||
|
||||
first_dataloader = trainer.get_eval_dataloader("first")
|
||||
first_dataloader_repeated = trainer.get_eval_dataloader("first")
|
||||
second_dataloader = trainer.get_eval_dataloader("second")
|
||||
second_dataloader_repeated = trainer.get_eval_dataloader("second")
|
||||
|
||||
self.assertEqual(first_dataset, first_dataloader.dataset)
|
||||
self.assertEqual(first_dataloader.dataset, first_dataloader_repeated.dataset)
|
||||
self.assertEqual(second_dataset, second_dataloader.dataset)
|
||||
self.assertEqual(second_dataloader.dataset, second_dataloader_repeated.dataset)
|
||||
self.assertEqual(first_dataloader, first_dataloader_repeated)
|
||||
self.assertEqual(second_dataloader, second_dataloader_repeated)
|
||||
|
||||
@require_lomo
|
||||
@require_torch_gpu
|
||||
def test_lomo(self):
|
||||
|
||||
Reference in New Issue
Block a user