Use stable functions (#9369)
This commit is contained in:
@@ -96,15 +96,15 @@ class TensorFlowBenchmarkArguments(BenchmarkArguments):
|
|||||||
tf.config.experimental_connect_to_cluster(self._setup_tpu)
|
tf.config.experimental_connect_to_cluster(self._setup_tpu)
|
||||||
tf.tpu.experimental.initialize_tpu_system(self._setup_tpu)
|
tf.tpu.experimental.initialize_tpu_system(self._setup_tpu)
|
||||||
|
|
||||||
strategy = tf.distribute.experimental.TPUStrategy(self._setup_tpu)
|
strategy = tf.distribute.TPUStrategy(self._setup_tpu)
|
||||||
else:
|
else:
|
||||||
# currently no multi gpu is allowed
|
# currently no multi gpu is allowed
|
||||||
if self.is_gpu:
|
if self.is_gpu:
|
||||||
# TODO: Currently only single GPU is supported
|
# TODO: Currently only single GPU is supported
|
||||||
tf.config.experimental.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
|
tf.config.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
|
||||||
strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}")
|
strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}")
|
||||||
else:
|
else:
|
||||||
tf.config.experimental.set_visible_devices([], "GPU") # disable GPU
|
tf.config.set_visible_devices([], "GPU") # disable GPU
|
||||||
strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}")
|
strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}")
|
||||||
|
|
||||||
return strategy
|
return strategy
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ from .integrations import ( # isort: split
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from packaging.version import parse
|
|
||||||
from tensorflow.python.distribute.values import PerReplica
|
from tensorflow.python.distribute.values import PerReplica
|
||||||
|
|
||||||
from .modeling_tf_utils import TFPreTrainedModel
|
from .modeling_tf_utils import TFPreTrainedModel
|
||||||
@@ -93,11 +92,6 @@ class TFTrainer:
|
|||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
assert parse(tf.__version__).release >= (2, 2, 0), (
|
|
||||||
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
|
|
||||||
% tf.__version__
|
|
||||||
)
|
|
||||||
|
|
||||||
self.model = model
|
self.model = model
|
||||||
self.args = args
|
self.args = args
|
||||||
self.train_dataset = train_dataset
|
self.train_dataset = train_dataset
|
||||||
@@ -141,7 +135,7 @@ class TFTrainer:
|
|||||||
raise ValueError("Trainer: training requires a train_dataset.")
|
raise ValueError("Trainer: training requires a train_dataset.")
|
||||||
|
|
||||||
self.total_train_batch_size = self.args.train_batch_size * self.args.gradient_accumulation_steps
|
self.total_train_batch_size = self.args.train_batch_size * self.args.gradient_accumulation_steps
|
||||||
self.num_train_examples = tf.data.experimental.cardinality(self.train_dataset).numpy()
|
self.num_train_examples = self.train_dataset.cardinality(self.train_dataset).numpy()
|
||||||
|
|
||||||
if self.num_train_examples < 0:
|
if self.num_train_examples < 0:
|
||||||
raise ValueError("The training dataset must have an asserted cardinality")
|
raise ValueError("The training dataset must have an asserted cardinality")
|
||||||
@@ -173,7 +167,7 @@ class TFTrainer:
|
|||||||
raise ValueError("Trainer: evaluation requires an eval_dataset.")
|
raise ValueError("Trainer: evaluation requires an eval_dataset.")
|
||||||
|
|
||||||
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
|
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
|
||||||
num_examples = tf.data.experimental.cardinality(eval_dataset).numpy()
|
num_examples = eval_dataset.cardinality(eval_dataset).numpy()
|
||||||
|
|
||||||
if num_examples < 0:
|
if num_examples < 0:
|
||||||
raise ValueError("The training dataset must have an asserted cardinality")
|
raise ValueError("The training dataset must have an asserted cardinality")
|
||||||
@@ -203,7 +197,7 @@ class TFTrainer:
|
|||||||
Subclass and override this method if you want to inject some custom behavior.
|
Subclass and override this method if you want to inject some custom behavior.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
num_examples = tf.data.experimental.cardinality(test_dataset).numpy()
|
num_examples = test_dataset.cardinality(test_dataset).numpy()
|
||||||
|
|
||||||
if num_examples < 0:
|
if num_examples < 0:
|
||||||
raise ValueError("The training dataset must have an asserted cardinality")
|
raise ValueError("The training dataset must have an asserted cardinality")
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ class TFTrainingArguments(TrainingArguments):
|
|||||||
tf.config.experimental_connect_to_cluster(tpu)
|
tf.config.experimental_connect_to_cluster(tpu)
|
||||||
tf.tpu.experimental.initialize_tpu_system(tpu)
|
tf.tpu.experimental.initialize_tpu_system(tpu)
|
||||||
|
|
||||||
strategy = tf.distribute.experimental.TPUStrategy(tpu)
|
strategy = tf.distribute.TPUStrategy(tpu)
|
||||||
|
|
||||||
elif len(gpus) == 0:
|
elif len(gpus) == 0:
|
||||||
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
||||||
|
|||||||
@@ -51,10 +51,10 @@ if is_tf_available():
|
|||||||
for gpu in gpus:
|
for gpu in gpus:
|
||||||
# Restrict TensorFlow to only allocate x GB of memory on the GPUs
|
# Restrict TensorFlow to only allocate x GB of memory on the GPUs
|
||||||
try:
|
try:
|
||||||
tf.config.experimental.set_virtual_device_configuration(
|
tf.config.set_logical_device_configuration(
|
||||||
gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
|
gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
|
||||||
)
|
)
|
||||||
logical_gpus = tf.config.experimental.list_logical_devices("GPU")
|
logical_gpus = tf.config.list_logical_devices("GPU")
|
||||||
print("Logical GPUs", logical_gpus)
|
print("Logical GPUs", logical_gpus)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
# Virtual devices must be set before GPUs have been initialized
|
# Virtual devices must be set before GPUs have been initialized
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from transformers import is_tf_available
|
from transformers import is_tf_available
|
||||||
from transformers.testing_utils import require_tf
|
from transformers.testing_utils import require_tf
|
||||||
|
|
||||||
@@ -78,18 +76,12 @@ class OptimizationFTest(unittest.TestCase):
|
|||||||
local_variables = strategy.experimental_local_results(gradient_placeholder)
|
local_variables = strategy.experimental_local_results(gradient_placeholder)
|
||||||
local_variables[0].assign(grad1)
|
local_variables[0].assign(grad1)
|
||||||
local_variables[1].assign(grad2)
|
local_variables[1].assign(grad2)
|
||||||
if version.parse(tf.version.VERSION) >= version.parse("2.2"):
|
|
||||||
strategy.run(accumulate_on_replica, args=(gradient_placeholder,))
|
strategy.run(accumulate_on_replica, args=(gradient_placeholder,))
|
||||||
else:
|
|
||||||
strategy.experimental_run_v2(accumulate_on_replica, args=(gradient_placeholder,))
|
|
||||||
|
|
||||||
@tf.function
|
@tf.function
|
||||||
def apply_grad():
|
def apply_grad():
|
||||||
with strategy.scope():
|
with strategy.scope():
|
||||||
if version.parse(tf.version.VERSION) >= version.parse("2.2"):
|
|
||||||
strategy.run(apply_on_replica)
|
strategy.run(apply_on_replica)
|
||||||
else:
|
|
||||||
strategy.experimental_run_v2(apply_on_replica)
|
|
||||||
|
|
||||||
def _check_local_values(grad1, grad2):
|
def _check_local_values(grad1, grad2):
|
||||||
values = strategy.experimental_local_results(accumulator._gradients[0])
|
values = strategy.experimental_local_results(accumulator._gradients[0])
|
||||||
|
|||||||
Reference in New Issue
Block a user