Make Trainer evaluation handle dynamic seq_length (#8336)

* Make Trainer evaluation handle dynamic seq_length

* Document behavior.

* Fix test

* Better fix

* Fixes for realsies this time

* Address review comments

* Without forgetting to save...
This commit is contained in:
Sylvain Gugger
2020-11-05 15:13:51 -05:00
committed by GitHub
parent 27b402cab0
commit 04e442d575
3 changed files with 135 additions and 13 deletions

View File

@@ -73,6 +73,22 @@ class RegressionDataset:
return result
class DynamicShapesDataset:
def __init__(self, length=64, seed=42, batch_size=8):
self.length = length
np.random.seed(seed)
sizes = np.random.randint(1, 20, (length // batch_size,))
# For easy batching, we make every batch_size consecutive samples the same size.
self.xs = [np.random.normal(size=(s,)) for s in sizes.repeat(batch_size)]
self.ys = [np.random.normal(size=(s,)) for s in sizes.repeat(batch_size)]
def __len__(self):
return self.length
def __getitem__(self, i):
return {"input_x": self.xs[i], "labels": self.ys[i]}
class AlmostAccuracy:
def __init__(self, thresh=0.25):
self.thresh = thresh
@@ -282,7 +298,7 @@ class TrainerIntegrationTest(unittest.TestCase):
self.assertEqual(len(trainer.get_train_dataloader()), 66 // (16 * n_gpu))
self.assertEqual(len(trainer.get_eval_dataloader()), 74 // (32 * n_gpu))
# Check passing a new dataset for evaluation wors
# Check passing a new dataset for evaluation works
new_eval_dataset = RegressionDataset(length=128)
self.assertEqual(len(trainer.get_eval_dataloader(new_eval_dataset)), 128 // (32 * n_gpu))
@@ -340,6 +356,42 @@ class TrainerIntegrationTest(unittest.TestCase):
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
def test_dynamic_shapes(self):
eval_dataset = DynamicShapesDataset(batch_size=self.batch_size)
model = RegressionModel(a=2, b=1)
args = TrainingArguments("./regression")
trainer = Trainer(model, args, eval_dataset=eval_dataset)
# Check evaluation can run to completion
_ = trainer.evaluate()
# Check predictions
preds = trainer.predict(eval_dataset)
for expected, seen in zip(eval_dataset.ys, preds.label_ids):
self.assertTrue(np.array_equal(expected, seen[: expected.shape[0]]))
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
for expected, seen in zip(eval_dataset.xs, preds.predictions):
self.assertTrue(np.array_equal(2 * expected + 1, seen[: expected.shape[0]]))
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
# Same tests with eval accumulation
args = TrainingArguments("./regression", eval_accumulation_steps=2)
trainer = Trainer(model, args, eval_dataset=eval_dataset)
# Check evaluation can run to completion
_ = trainer.evaluate()
# Check predictions
preds = trainer.predict(eval_dataset)
for expected, seen in zip(eval_dataset.ys, preds.label_ids):
self.assertTrue(np.array_equal(expected, seen[: expected.shape[0]]))
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
for expected, seen in zip(eval_dataset.xs, preds.predictions):
self.assertTrue(np.array_equal(2 * expected + 1, seen[: expected.shape[0]]))
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
@require_datasets
def test_trainer_with_datasets(self):
import datasets