Files
HuggingFace_transformer/src/transformers/training_args_tf.py
Julien Plu ca13618681 Question Answering for TF trainer (#4320)
* Add QA trainer example for TF

* Make data_dir optional

* Fix parameter logic

* Fix feature convert

* Update the READMEs to add the question-answering task

* Apply style

* Change 'sequence-classification' to 'text-classification' and prefix with 'eval' all the metric names

* Apply style

* Apply style
2020-05-13 09:22:31 -04:00

87 lines
3.0 KiB
Python

import logging
from dataclasses import dataclass, field
from typing import Tuple
from .file_utils import cached_property, is_tf_available, tf_required
from .training_args import TrainingArguments
logger = logging.getLogger(__name__)
if is_tf_available():
import tensorflow as tf
@dataclass
class TFTrainingArguments(TrainingArguments):
optimizer_name: str = field(
default="adam",
metadata={
"help": 'Name of a Tensorflow optimizer among "adadelta, adagrad, adam, adamax, ftrl, nadam, rmsprop, sgd, adamw"'
},
)
mode: str = field(
default="text-classification",
metadata={"help": 'Type of task, one of "text-classification", "token-classification", "question-answering"'},
)
loss_name: str = field(
default="SparseCategoricalCrossentropy",
metadata={
"help": "Name of a Tensorflow loss. For the list see: https://www.tensorflow.org/api_docs/python/tf/keras/losses"
},
)
tpu_name: str = field(
default=None, metadata={"help": "Name of TPU"},
)
end_lr: float = field(
default=0, metadata={"help": "End learning rate for optimizer"},
)
eval_steps: int = field(default=1000, metadata={"help": "Run an evaluation every X steps."})
debug: bool = field(
default=False, metadata={"help": "Activate the trace to record computation graphs and profiling information"}
)
@cached_property
@tf_required
def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
logger.info("Tensorflow: setting up strategy")
gpus = tf.config.list_physical_devices("GPU")
if self.no_cuda:
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
else:
try:
if self.tpu_name:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(self.tpu_name)
else:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
except ValueError:
tpu = None
if tpu:
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)
elif len(gpus) == 0:
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
elif len(gpus) == 1:
strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
elif len(gpus) > 1:
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
strategy = tf.distribute.MirroredStrategy()
else:
raise ValueError("Cannot find the proper strategy please check your environment properties.")
return strategy
@property
@tf_required
def strategy(self) -> "tf.distribute.Strategy":
return self._setup_strategy
@property
@tf_required
def n_gpu(self) -> int:
return self._setup_strategy.num_replicas_in_sync