Add DistributedSamplerWithLoop (#10746)

* Add DistributedSamplerWithLoop

* Fix typo

* Test and small fix
This commit is contained in:
Sylvain Gugger
2021-03-16 11:22:39 -04:00
committed by GitHub
parent 1449222217
commit a0a027c2ed
5 changed files with 93 additions and 20 deletions

View File

@@ -27,6 +27,7 @@ if is_torch_available():
from transformers.modeling_outputs import SequenceClassifierOutput
from transformers.trainer_pt_utils import (
DistributedLengthGroupedSampler,
DistributedSamplerWithLoop,
DistributedTensorGatherer,
LabelSmoother,
LengthGroupedSampler,
@@ -141,3 +142,28 @@ class TrainerUtilsTest(unittest.TestCase):
['0.linear1.weight', '0.linear1.bias', '0.linear2.weight', '0.linear2.bias', '0.bias', '1.0.linear1.weight', '1.0.linear1.bias', '1.0.linear2.weight', '1.0.linear2.bias', '1.0.bias', '1.1.linear1.weight', '1.1.linear1.bias', '1.1.linear2.weight', '1.1.linear2.bias', '1.1.bias']
)
# fmt: on
def test_distributed_sampler_with_loop(self):
batch_size = 16
for length in [23, 64, 123]:
dataset = list(range(length))
shard1 = DistributedSamplerWithLoop(dataset, batch_size, num_replicas=2, rank=0)
shard2 = DistributedSamplerWithLoop(dataset, batch_size, num_replicas=2, rank=1)
# Set seeds
shard1.set_epoch(0)
shard2.set_epoch(0)
# Sample
samples1 = list(shard1)
samples2 = list(shard2)
self.assertTrue(len(samples1) % batch_size == 0)
self.assertTrue(len(samples2) % batch_size == 0)
total = []
for sample1, sample2 in zip(samples1, samples2):
total += [sample1, sample2]
self.assertEqual(set(total[:length]), set(dataset))
self.assertEqual(set(total[length:]), set(total[: (len(total) - length)]))