Make Trainer evaluation handle dynamic seq_length (#8336)
* Make Trainer evaluation handle dynamic seq_length * Document behavior. * Fix test * Better fix * Fixes for realsies this time * Address review comments * Without forgetting to save...
This commit is contained in:
@@ -73,6 +73,22 @@ class RegressionDataset:
|
||||
return result
|
||||
|
||||
|
||||
class DynamicShapesDataset:
|
||||
def __init__(self, length=64, seed=42, batch_size=8):
|
||||
self.length = length
|
||||
np.random.seed(seed)
|
||||
sizes = np.random.randint(1, 20, (length // batch_size,))
|
||||
# For easy batching, we make every batch_size consecutive samples the same size.
|
||||
self.xs = [np.random.normal(size=(s,)) for s in sizes.repeat(batch_size)]
|
||||
self.ys = [np.random.normal(size=(s,)) for s in sizes.repeat(batch_size)]
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, i):
|
||||
return {"input_x": self.xs[i], "labels": self.ys[i]}
|
||||
|
||||
|
||||
class AlmostAccuracy:
|
||||
def __init__(self, thresh=0.25):
|
||||
self.thresh = thresh
|
||||
@@ -282,7 +298,7 @@ class TrainerIntegrationTest(unittest.TestCase):
|
||||
self.assertEqual(len(trainer.get_train_dataloader()), 66 // (16 * n_gpu))
|
||||
self.assertEqual(len(trainer.get_eval_dataloader()), 74 // (32 * n_gpu))
|
||||
|
||||
# Check passing a new dataset for evaluation wors
|
||||
# Check passing a new dataset for evaluation works
|
||||
new_eval_dataset = RegressionDataset(length=128)
|
||||
self.assertEqual(len(trainer.get_eval_dataloader(new_eval_dataset)), 128 // (32 * n_gpu))
|
||||
|
||||
@@ -340,6 +356,42 @@ class TrainerIntegrationTest(unittest.TestCase):
|
||||
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
|
||||
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
|
||||
|
||||
def test_dynamic_shapes(self):
|
||||
eval_dataset = DynamicShapesDataset(batch_size=self.batch_size)
|
||||
model = RegressionModel(a=2, b=1)
|
||||
args = TrainingArguments("./regression")
|
||||
trainer = Trainer(model, args, eval_dataset=eval_dataset)
|
||||
|
||||
# Check evaluation can run to completion
|
||||
_ = trainer.evaluate()
|
||||
|
||||
# Check predictions
|
||||
preds = trainer.predict(eval_dataset)
|
||||
for expected, seen in zip(eval_dataset.ys, preds.label_ids):
|
||||
self.assertTrue(np.array_equal(expected, seen[: expected.shape[0]]))
|
||||
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
|
||||
|
||||
for expected, seen in zip(eval_dataset.xs, preds.predictions):
|
||||
self.assertTrue(np.array_equal(2 * expected + 1, seen[: expected.shape[0]]))
|
||||
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
|
||||
|
||||
# Same tests with eval accumulation
|
||||
args = TrainingArguments("./regression", eval_accumulation_steps=2)
|
||||
trainer = Trainer(model, args, eval_dataset=eval_dataset)
|
||||
|
||||
# Check evaluation can run to completion
|
||||
_ = trainer.evaluate()
|
||||
|
||||
# Check predictions
|
||||
preds = trainer.predict(eval_dataset)
|
||||
for expected, seen in zip(eval_dataset.ys, preds.label_ids):
|
||||
self.assertTrue(np.array_equal(expected, seen[: expected.shape[0]]))
|
||||
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
|
||||
|
||||
for expected, seen in zip(eval_dataset.xs, preds.predictions):
|
||||
self.assertTrue(np.array_equal(2 * expected + 1, seen[: expected.shape[0]]))
|
||||
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
|
||||
|
||||
@require_datasets
|
||||
def test_trainer_with_datasets(self):
|
||||
import datasets
|
||||
|
||||
Reference in New Issue
Block a user