Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f43a425fe | ||
|
|
45dae78e61 | ||
|
|
12b04b5003 | ||
|
|
6460e9a0f3 |
@@ -26,7 +26,8 @@ author = u'huggingface'
|
||||
# The short X.Y version
|
||||
version = u''
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = u'4.4.1'
|
||||
release = u'4.4.2'
|
||||
|
||||
|
||||
|
||||
# Prefix link to point to master, comment this during version release and uncomment below line
|
||||
|
||||
2
setup.py
2
setup.py
@@ -278,7 +278,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.4.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.4.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.4.1"
|
||||
__version__ = "4.4.2"
|
||||
|
||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||
# default Python logging output behavior when present.
|
||||
|
||||
@@ -102,8 +102,12 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tf-nightly-gpu")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_tf_version = None
|
||||
_tf_available = False
|
||||
# Support for intel-tensorflow version
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("intel-tensorflow")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_tf_version = None
|
||||
_tf_available = False
|
||||
if _tf_available:
|
||||
if version.parse(_tf_version) < version.parse("2"):
|
||||
logger.info(f"TensorFlow found but with version {_tf_version}. Transformers requires version 2 minimum.")
|
||||
|
||||
@@ -112,7 +112,12 @@ class SageMakerTrainer(Trainer):
|
||||
|
||||
def _get_eval_sampler(self, eval_dataset: Dataset) -> Optional[torch.utils.data.sampler.Sampler]:
|
||||
if self.is_model_parallel_enabled:
|
||||
return SequentialDistributedSampler(eval_dataset, num_replicas=smp.dp_size(), rank=smp.dp_rank())
|
||||
return SequentialDistributedSampler(
|
||||
eval_dataset,
|
||||
num_replicas=smp.dp_size(),
|
||||
rank=smp.dp_rank(),
|
||||
batch_size=self.args.per_device_eval_batch_size,
|
||||
)
|
||||
else:
|
||||
return super()._get_eval_sampler(eval_dataset)
|
||||
|
||||
|
||||
@@ -670,7 +670,7 @@ class Trainer:
|
||||
"""
|
||||
Helper to get number of samples in a :class:`~torch.utils.data.DataLoader` by accessing its dataset.
|
||||
|
||||
Will raise an exception if the underlying dataset dese not implement method :obj:`__len__`
|
||||
Will raise an exception if the underlying dataset does not implement method :obj:`__len__`
|
||||
"""
|
||||
return len(dataloader.dataset)
|
||||
|
||||
@@ -1783,8 +1783,13 @@ class Trainer:
|
||||
|
||||
eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
|
||||
if not prediction_loss_only:
|
||||
preds_gatherer = DistributedTensorGatherer(world_size, num_examples)
|
||||
labels_gatherer = DistributedTensorGatherer(world_size, num_examples)
|
||||
# The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
|
||||
# a batch size to the sampler)
|
||||
make_multiple_of = None
|
||||
if hasattr(dataloader, "sampler") and isinstance(dataloader.sampler, SequentialDistributedSampler):
|
||||
make_multiple_of = dataloader.sampler.batch_size
|
||||
preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
|
||||
labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
|
||||
|
||||
model.eval()
|
||||
|
||||
|
||||
@@ -220,7 +220,7 @@ class SequentialDistributedSampler(Sampler):
|
||||
or `reduce` resulting tensors at the end of the loop.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset, num_replicas=None, rank=None):
|
||||
def __init__(self, dataset, num_replicas=None, rank=None, batch_size=None):
|
||||
if num_replicas is None:
|
||||
if not dist.is_available():
|
||||
raise RuntimeError("Requires distributed package to be available")
|
||||
@@ -232,8 +232,14 @@ class SequentialDistributedSampler(Sampler):
|
||||
self.dataset = dataset
|
||||
self.num_replicas = num_replicas
|
||||
self.rank = rank
|
||||
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
|
||||
num_samples = len(self.dataset)
|
||||
# Add extra samples to make num_samples a multiple of batch_size if passed
|
||||
if batch_size is not None:
|
||||
self.num_samples = int(math.ceil(num_samples / (batch_size * num_replicas))) * batch_size
|
||||
else:
|
||||
self.num_samples = int(math.ceil(num_samples / num_replicas))
|
||||
self.total_size = self.num_samples * self.num_replicas
|
||||
self.batch_size = batch_size
|
||||
|
||||
def __iter__(self):
|
||||
indices = list(range(len(self.dataset)))
|
||||
|
||||
@@ -97,6 +97,11 @@ if __name__ == "__main__":
|
||||
def compute_metrics(p: EvalPrediction) -> Dict:
|
||||
sequential = list(range(len(dataset)))
|
||||
success = p.predictions.tolist() == sequential and p.label_ids.tolist() == sequential
|
||||
if not success and training_args.local_rank == 0:
|
||||
logger.warning(
|
||||
"Predictions and/or labels do not match expected results:\n - predictions: "
|
||||
f"{p.predictions.tolist()}\n - labels: {p.label_ids.tolist()}\n - expected: {sequential}"
|
||||
)
|
||||
return {"success": success}
|
||||
|
||||
trainer = Trainer(
|
||||
|
||||
@@ -31,6 +31,7 @@ if is_torch_available():
|
||||
DistributedTensorGatherer,
|
||||
LabelSmoother,
|
||||
LengthGroupedSampler,
|
||||
SequentialDistributedSampler,
|
||||
get_parameter_names,
|
||||
)
|
||||
|
||||
@@ -167,3 +168,35 @@ class TrainerUtilsTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(set(total[:length]), set(dataset))
|
||||
self.assertEqual(set(total[length:]), set(total[: (len(total) - length)]))
|
||||
|
||||
def test_sequential_distributed_sampler(self):
|
||||
batch_size = 16
|
||||
for length in [23, 64, 123]:
|
||||
dataset = list(range(length))
|
||||
shard1 = SequentialDistributedSampler(dataset, num_replicas=2, rank=0)
|
||||
shard2 = SequentialDistributedSampler(dataset, num_replicas=2, rank=1)
|
||||
|
||||
# Sample
|
||||
samples1 = list(shard1)
|
||||
samples2 = list(shard2)
|
||||
|
||||
total = samples1 + samples2
|
||||
|
||||
self.assertListEqual(total[:length], dataset)
|
||||
self.assertListEqual(total[length:], dataset[: (len(total) - length)])
|
||||
|
||||
# With a batch_size passed
|
||||
shard1 = SequentialDistributedSampler(dataset, num_replicas=2, rank=0, batch_size=batch_size)
|
||||
shard2 = SequentialDistributedSampler(dataset, num_replicas=2, rank=1, batch_size=batch_size)
|
||||
|
||||
# Sample
|
||||
samples1 = list(shard1)
|
||||
samples2 = list(shard2)
|
||||
|
||||
self.assertTrue(len(samples1) % batch_size == 0)
|
||||
self.assertTrue(len(samples2) % batch_size == 0)
|
||||
|
||||
total = samples1 + samples2
|
||||
|
||||
self.assertListEqual(total[:length], dataset)
|
||||
self.assertListEqual(total[length:], dataset[: (len(total) - length)])
|
||||
|
||||
Reference in New Issue
Block a user