Add DistributedSamplerWithLoop (#10746)

* Add DistributedSamplerWithLoop * Fix typo * Test and small fix
2021-03-16 11:22:39 -04:00
parent 1449222217
commit a0a027c2ed
5 changed files with 93 additions and 20 deletions
--- a/tests/test_trainer_utils.py
+++ b/tests/test_trainer_utils.py
@@ -27,6 +27,7 @@ if is_torch_available():
    from transformers.modeling_outputs import SequenceClassifierOutput
    from transformers.trainer_pt_utils import (
        DistributedLengthGroupedSampler,
+        DistributedSamplerWithLoop,
        DistributedTensorGatherer,
        LabelSmoother,
        LengthGroupedSampler,
@@ -141,3 +142,28 @@ class TrainerUtilsTest(unittest.TestCase):
            ['0.linear1.weight', '0.linear1.bias', '0.linear2.weight', '0.linear2.bias', '0.bias', '1.0.linear1.weight', '1.0.linear1.bias', '1.0.linear2.weight', '1.0.linear2.bias', '1.0.bias', '1.1.linear1.weight', '1.1.linear1.bias', '1.1.linear2.weight', '1.1.linear2.bias', '1.1.bias']
        )
        # fmt: on
+
+    def test_distributed_sampler_with_loop(self):
+        batch_size = 16
+        for length in [23, 64, 123]:
+            dataset = list(range(length))
+            shard1 = DistributedSamplerWithLoop(dataset, batch_size, num_replicas=2, rank=0)
+            shard2 = DistributedSamplerWithLoop(dataset, batch_size, num_replicas=2, rank=1)
+
+            # Set seeds
+            shard1.set_epoch(0)
+            shard2.set_epoch(0)
+
+            # Sample
+            samples1 = list(shard1)
+            samples2 = list(shard2)
+
+            self.assertTrue(len(samples1) % batch_size == 0)
+            self.assertTrue(len(samples2) % batch_size == 0)
+
+            total = []
+            for sample1, sample2 in zip(samples1, samples2):
+                total += [sample1, sample2]
+
+            self.assertEqual(set(total[:length]), set(dataset))
+            self.assertEqual(set(total[length:]), set(total[: (len(total) - length)]))