* Add QA trainer example for TF * Make data_dir optional * Fix parameter logic * Fix feature convert * Update the READMEs to add the question-answering task * Apply style * Change 'sequence-classification' to 'text-classification' and prefix with 'eval' all the metric names * Apply style * Apply style
87 lines
3.0 KiB
Python
87 lines
3.0 KiB
Python
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Tuple
|
|
|
|
from .file_utils import cached_property, is_tf_available, tf_required
|
|
from .training_args import TrainingArguments
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
if is_tf_available():
|
|
import tensorflow as tf
|
|
|
|
|
|
@dataclass
|
|
class TFTrainingArguments(TrainingArguments):
|
|
optimizer_name: str = field(
|
|
default="adam",
|
|
metadata={
|
|
"help": 'Name of a Tensorflow optimizer among "adadelta, adagrad, adam, adamax, ftrl, nadam, rmsprop, sgd, adamw"'
|
|
},
|
|
)
|
|
mode: str = field(
|
|
default="text-classification",
|
|
metadata={"help": 'Type of task, one of "text-classification", "token-classification", "question-answering"'},
|
|
)
|
|
loss_name: str = field(
|
|
default="SparseCategoricalCrossentropy",
|
|
metadata={
|
|
"help": "Name of a Tensorflow loss. For the list see: https://www.tensorflow.org/api_docs/python/tf/keras/losses"
|
|
},
|
|
)
|
|
tpu_name: str = field(
|
|
default=None, metadata={"help": "Name of TPU"},
|
|
)
|
|
end_lr: float = field(
|
|
default=0, metadata={"help": "End learning rate for optimizer"},
|
|
)
|
|
eval_steps: int = field(default=1000, metadata={"help": "Run an evaluation every X steps."})
|
|
debug: bool = field(
|
|
default=False, metadata={"help": "Activate the trace to record computation graphs and profiling information"}
|
|
)
|
|
|
|
@cached_property
|
|
@tf_required
|
|
def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
|
|
logger.info("Tensorflow: setting up strategy")
|
|
gpus = tf.config.list_physical_devices("GPU")
|
|
|
|
if self.no_cuda:
|
|
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
|
else:
|
|
try:
|
|
if self.tpu_name:
|
|
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(self.tpu_name)
|
|
else:
|
|
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
|
|
except ValueError:
|
|
tpu = None
|
|
|
|
if tpu:
|
|
tf.config.experimental_connect_to_cluster(tpu)
|
|
tf.tpu.experimental.initialize_tpu_system(tpu)
|
|
|
|
strategy = tf.distribute.experimental.TPUStrategy(tpu)
|
|
elif len(gpus) == 0:
|
|
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
|
elif len(gpus) == 1:
|
|
strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
|
|
elif len(gpus) > 1:
|
|
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
|
|
strategy = tf.distribute.MirroredStrategy()
|
|
else:
|
|
raise ValueError("Cannot find the proper strategy please check your environment properties.")
|
|
|
|
return strategy
|
|
|
|
@property
|
|
@tf_required
|
|
def strategy(self) -> "tf.distribute.Strategy":
|
|
return self._setup_strategy
|
|
|
|
@property
|
|
@tf_required
|
|
def n_gpu(self) -> int:
|
|
return self._setup_strategy.num_replicas_in_sync
|