Fix distributed evaluation (#10795)

* Fix distributed evaluation * Use logger
2021-03-18 13:12:04 -04:00
parent 9352b5151a
commit 008672e6e5
2 changed files with 13 additions and 3 deletions
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -690,7 +690,7 @@ class Trainer:
        """
        Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
-        Will raise an exception if the underlying dataset dese not implement method :obj:`__len__`
+        Will raise an exception if the underlying dataset does not implement method :obj:`__len__`
        """
        return len(dataloader.dataset)
@@ -1812,8 +1812,13 @@ class Trainer:
        eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
        if not prediction_loss_only:
-            preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
+            # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
-            labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
+            # a batch size to the sampler)
            make_multiple_of = None
            if hasattr(dataloader, "sampler") and isinstance(dataloader.sampler, SequentialDistributedSampler):
                make_multiple_of = dataloader.sampler.batch_size
            preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
            labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
        model.eval()
--- a/tests/test_trainer_distributed.py
+++ b/tests/test_trainer_distributed.py
@@ -97,6 +97,11 @@ if __name__ == "__main__":
        def compute_metrics(p: EvalPrediction) -> Dict:
            sequential = list(range(len(dataset)))
            success = p.predictions.tolist() == sequential and p.label_ids.tolist() == sequential
            if not success and training_args.local_rank == 0:
                logger.warning(
                    "Predictions and/or labels do not match expected results:\n  - predictions: "
                    f"{p.predictions.tolist()}\n  - labels: {p.label_ids.tolist()}\n  - expected: {sequential}"
                )
            return {"success": success}
        trainer = Trainer(