Use stable functions (#9369)

This commit is contained in:
Julien Plu
2021-01-05 09:58:26 +01:00
committed by GitHub
parent 4aa8f6ad99
commit 4225740a7b
5 changed files with 12 additions and 26 deletions

View File

@@ -96,15 +96,15 @@ class TensorFlowBenchmarkArguments(BenchmarkArguments):
tf.config.experimental_connect_to_cluster(self._setup_tpu) tf.config.experimental_connect_to_cluster(self._setup_tpu)
tf.tpu.experimental.initialize_tpu_system(self._setup_tpu) tf.tpu.experimental.initialize_tpu_system(self._setup_tpu)
strategy = tf.distribute.experimental.TPUStrategy(self._setup_tpu) strategy = tf.distribute.TPUStrategy(self._setup_tpu)
else: else:
# currently no multi gpu is allowed # currently no multi gpu is allowed
if self.is_gpu: if self.is_gpu:
# TODO: Currently only single GPU is supported # TODO: Currently only single GPU is supported
tf.config.experimental.set_visible_devices(self.gpu_list[self.device_idx], "GPU") tf.config.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}") strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}")
else: else:
tf.config.experimental.set_visible_devices([], "GPU") # disable GPU tf.config.set_visible_devices([], "GPU") # disable GPU
strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}") strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}")
return strategy return strategy

View File

@@ -27,7 +27,6 @@ from .integrations import ( # isort: split
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from packaging.version import parse
from tensorflow.python.distribute.values import PerReplica from tensorflow.python.distribute.values import PerReplica
from .modeling_tf_utils import TFPreTrainedModel from .modeling_tf_utils import TFPreTrainedModel
@@ -93,11 +92,6 @@ class TFTrainer:
None, None,
), ),
): ):
assert parse(tf.__version__).release >= (2, 2, 0), (
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
% tf.__version__
)
self.model = model self.model = model
self.args = args self.args = args
self.train_dataset = train_dataset self.train_dataset = train_dataset
@@ -141,7 +135,7 @@ class TFTrainer:
raise ValueError("Trainer: training requires a train_dataset.") raise ValueError("Trainer: training requires a train_dataset.")
self.total_train_batch_size = self.args.train_batch_size * self.args.gradient_accumulation_steps self.total_train_batch_size = self.args.train_batch_size * self.args.gradient_accumulation_steps
self.num_train_examples = tf.data.experimental.cardinality(self.train_dataset).numpy() self.num_train_examples = self.train_dataset.cardinality(self.train_dataset).numpy()
if self.num_train_examples < 0: if self.num_train_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality") raise ValueError("The training dataset must have an asserted cardinality")
@@ -173,7 +167,7 @@ class TFTrainer:
raise ValueError("Trainer: evaluation requires an eval_dataset.") raise ValueError("Trainer: evaluation requires an eval_dataset.")
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
num_examples = tf.data.experimental.cardinality(eval_dataset).numpy() num_examples = eval_dataset.cardinality(eval_dataset).numpy()
if num_examples < 0: if num_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality") raise ValueError("The training dataset must have an asserted cardinality")
@@ -203,7 +197,7 @@ class TFTrainer:
Subclass and override this method if you want to inject some custom behavior. Subclass and override this method if you want to inject some custom behavior.
""" """
num_examples = tf.data.experimental.cardinality(test_dataset).numpy() num_examples = test_dataset.cardinality(test_dataset).numpy()
if num_examples < 0: if num_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality") raise ValueError("The training dataset must have an asserted cardinality")

View File

@@ -188,7 +188,7 @@ class TFTrainingArguments(TrainingArguments):
tf.config.experimental_connect_to_cluster(tpu) tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu) tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu) strategy = tf.distribute.TPUStrategy(tpu)
elif len(gpus) == 0: elif len(gpus) == 0:
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0") strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")

View File

@@ -51,10 +51,10 @@ if is_tf_available():
for gpu in gpus: for gpu in gpus:
# Restrict TensorFlow to only allocate x GB of memory on the GPUs # Restrict TensorFlow to only allocate x GB of memory on the GPUs
try: try:
tf.config.experimental.set_virtual_device_configuration( tf.config.set_logical_device_configuration(
gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)] gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
) )
logical_gpus = tf.config.experimental.list_logical_devices("GPU") logical_gpus = tf.config.list_logical_devices("GPU")
print("Logical GPUs", logical_gpus) print("Logical GPUs", logical_gpus)
except RuntimeError as e: except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized # Virtual devices must be set before GPUs have been initialized

View File

@@ -14,8 +14,6 @@
import unittest import unittest
from packaging import version
from transformers import is_tf_available from transformers import is_tf_available
from transformers.testing_utils import require_tf from transformers.testing_utils import require_tf
@@ -78,18 +76,12 @@ class OptimizationFTest(unittest.TestCase):
local_variables = strategy.experimental_local_results(gradient_placeholder) local_variables = strategy.experimental_local_results(gradient_placeholder)
local_variables[0].assign(grad1) local_variables[0].assign(grad1)
local_variables[1].assign(grad2) local_variables[1].assign(grad2)
if version.parse(tf.version.VERSION) >= version.parse("2.2"): strategy.run(accumulate_on_replica, args=(gradient_placeholder,))
strategy.run(accumulate_on_replica, args=(gradient_placeholder,))
else:
strategy.experimental_run_v2(accumulate_on_replica, args=(gradient_placeholder,))
@tf.function @tf.function
def apply_grad(): def apply_grad():
with strategy.scope(): with strategy.scope():
if version.parse(tf.version.VERSION) >= version.parse("2.2"): strategy.run(apply_on_replica)
strategy.run(apply_on_replica)
else:
strategy.experimental_run_v2(apply_on_replica)
def _check_local_values(grad1, grad2): def _check_local_values(grad1, grad2):
values = strategy.experimental_local_results(accumulator._gradients[0]) values = strategy.experimental_local_results(accumulator._gradients[0])