From 286dc19a4f7a1cbae3bb101094bc8e76805de454 Mon Sep 17 00:00:00 2001 From: Jonathan Chang <31893406+cccntu@users.noreply.github.com> Date: Tue, 27 Oct 2020 21:52:35 +0800 Subject: [PATCH] Fix IterableDataset with __len__ in Trainer (#8095) --- src/transformers/trainer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index a546b731fc..942ddde04d 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -384,7 +384,9 @@ class Trainer: dataset.set_format(type=dataset.format["type"], columns=columns) def _get_train_sampler(self) -> Optional[torch.utils.data.sampler.Sampler]: - if not isinstance(self.train_dataset, collections.abc.Sized): + if isinstance(self.train_dataset, torch.utils.data.IterableDataset) or not isinstance( + self.train_dataset, collections.abc.Sized + ): return None elif is_torch_tpu_available(): return get_tpu_sampler(self.train_dataset)