Add MultipleChoice to TFTrainer [WIP] (#4270)
* catch gpu len 1 set to gpu0 * Add mpc to trainer * Add MPC for TF * fix TF automodel for MPC and add Albert * Apply style * Fix import * Note to self: double check * Make shape None, None for datasetgenerator output shapes * Add from_pt bool which doesnt seem to work * Original checkpoint dir * Fix docstrings for automodel * Update readme and apply style * Colab should probably not be from users * Colabs should probably not be from users * Add colab * Update README.md * Update README.md * Cleanup __intit__ * Cleanup flake8 trailing comma * Update src/transformers/training_args_tf.py * Update src/transformers/modeling_tf_auto.py Co-authored-by: Viktor Alm <viktoralm@pop-os.localdomain> Co-authored-by: Julien Chaumond <chaumond@gmail.com>
This commit is contained in:
@@ -359,6 +359,7 @@ if is_tf_available():
|
||||
from .modeling_tf_auto import (
|
||||
TFAutoModel,
|
||||
TFAutoModelForPreTraining,
|
||||
TFAutoModelForMultipleChoice,
|
||||
TFAutoModelForSequenceClassification,
|
||||
TFAutoModelForQuestionAnswering,
|
||||
TFAutoModelWithLMHead,
|
||||
@@ -493,6 +494,7 @@ if is_tf_available():
|
||||
TFAlbertModel,
|
||||
TFAlbertForPreTraining,
|
||||
TFAlbertForMaskedLM,
|
||||
TFAlbertForMultipleChoice,
|
||||
TFAlbertForSequenceClassification,
|
||||
TFAlbertForQuestionAnswering,
|
||||
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
|
||||
@@ -21,7 +21,7 @@ import logging
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_albert import AlbertConfig
|
||||
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
|
||||
from .modeling_tf_bert import ACT2FN, TFBertSelfAttention
|
||||
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, keras_serializable, shape_list
|
||||
from .tokenization_utils import BatchEncoding
|
||||
@@ -957,3 +957,127 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel):
|
||||
outputs = (start_logits, end_logits,) + outputs[2:]
|
||||
|
||||
return outputs # start_logits, end_logits, (hidden_states), (attentions)
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""Albert Model with a multiple choice classification head on top (a linear layer on top of
|
||||
the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
|
||||
ALBERT_START_DOCSTRING,
|
||||
)
|
||||
class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel):
|
||||
def __init__(self, config, *inputs, **kwargs):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
|
||||
self.albert = TFAlbertMainLayer(config, name="albert")
|
||||
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
|
||||
self.classifier = tf.keras.layers.Dense(
|
||||
1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@property
|
||||
def dummy_inputs(self):
|
||||
""" Dummy inputs to build the network.
|
||||
|
||||
Returns:
|
||||
tf.Tensor with dummy inputs
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(ALBERT_INPUTS_DOCSTRING)
|
||||
def call(
|
||||
self,
|
||||
inputs,
|
||||
attention_mask=None,
|
||||
token_type_ids=None,
|
||||
position_ids=None,
|
||||
head_mask=None,
|
||||
inputs_embeds=None,
|
||||
training=False,
|
||||
):
|
||||
r"""
|
||||
Return:
|
||||
:obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
|
||||
classification_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`:
|
||||
`num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above).
|
||||
|
||||
Classification scores (before SoftMax).
|
||||
hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`):
|
||||
tuple of :obj:`tf.Tensor` (one for the output of the embeddings + one for the output of each layer)
|
||||
of shape :obj:`(batch_size, sequence_length, hidden_size)`.
|
||||
|
||||
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
|
||||
attentions (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``config.output_attentions=True``):
|
||||
tuple of :obj:`tf.Tensor` (one for each layer) of shape
|
||||
:obj:`(batch_size, num_heads, sequence_length, sequence_length)`:
|
||||
|
||||
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
||||
|
||||
Examples::
|
||||
|
||||
import tensorflow as tf
|
||||
from transformers import AlbertTokenizer, TFAlbertForMultipleChoice
|
||||
|
||||
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
|
||||
model = TFAlbertForMultipleChoice.from_pretrained('albert-base-v2')
|
||||
|
||||
example1 = ["This is a context", "Is it a context? Yes"]
|
||||
example2 = ["This is a context", "Is it a context? No"]
|
||||
encoding = tokenizer.batch_encode_plus([example1, example2], return_tensors='tf', truncation_strategy="only_first", pad_to_max_length=True, max_length=128)
|
||||
outputs = model(encoding["input_ids"][None, :])
|
||||
logits = outputs[0]
|
||||
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
attention_mask = inputs[1] if len(inputs) > 1 else attention_mask
|
||||
token_type_ids = inputs[2] if len(inputs) > 2 else token_type_ids
|
||||
position_ids = inputs[3] if len(inputs) > 3 else position_ids
|
||||
head_mask = inputs[4] if len(inputs) > 4 else head_mask
|
||||
inputs_embeds = inputs[5] if len(inputs) > 5 else inputs_embeds
|
||||
assert len(inputs) <= 6, "Too many inputs."
|
||||
elif isinstance(inputs, dict):
|
||||
print("isdict(1)")
|
||||
input_ids = inputs.get("input_ids")
|
||||
print(input_ids)
|
||||
|
||||
attention_mask = inputs.get("attention_mask", attention_mask)
|
||||
token_type_ids = inputs.get("token_type_ids", token_type_ids)
|
||||
position_ids = inputs.get("position_ids", position_ids)
|
||||
head_mask = inputs.get("head_mask", head_mask)
|
||||
inputs_embeds = inputs.get("inputs_embeds", inputs_embeds)
|
||||
assert len(inputs) <= 6, "Too many inputs."
|
||||
else:
|
||||
input_ids = inputs
|
||||
|
||||
if input_ids is not None:
|
||||
num_choices = shape_list(input_ids)[1]
|
||||
seq_length = shape_list(input_ids)[2]
|
||||
else:
|
||||
num_choices = shape_list(inputs_embeds)[1]
|
||||
seq_length = shape_list(inputs_embeds)[2]
|
||||
|
||||
flat_input_ids = tf.reshape(input_ids, (-1, seq_length)) if input_ids is not None else None
|
||||
flat_attention_mask = tf.reshape(attention_mask, (-1, seq_length)) if attention_mask is not None else None
|
||||
flat_token_type_ids = tf.reshape(token_type_ids, (-1, seq_length)) if token_type_ids is not None else None
|
||||
flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None
|
||||
|
||||
flat_inputs = [
|
||||
flat_input_ids,
|
||||
flat_attention_mask,
|
||||
flat_token_type_ids,
|
||||
flat_position_ids,
|
||||
head_mask,
|
||||
inputs_embeds,
|
||||
]
|
||||
|
||||
outputs = self.albert(flat_inputs, training=training)
|
||||
|
||||
pooled_output = outputs[1]
|
||||
|
||||
pooled_output = self.dropout(pooled_output, training=training)
|
||||
logits = self.classifier(pooled_output)
|
||||
reshaped_logits = tf.reshape(logits, (-1, num_choices))
|
||||
|
||||
outputs = (reshaped_logits,) + outputs[2:] # add hidden states and attention if they are here
|
||||
|
||||
return outputs # reshaped_logits, (hidden_states), (attentions)
|
||||
|
||||
@@ -36,6 +36,7 @@ from .configuration_utils import PretrainedConfig
|
||||
from .modeling_tf_albert import (
|
||||
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
TFAlbertForMaskedLM,
|
||||
TFAlbertForMultipleChoice,
|
||||
TFAlbertForPreTraining,
|
||||
TFAlbertForQuestionAnswering,
|
||||
TFAlbertForSequenceClassification,
|
||||
@@ -44,6 +45,7 @@ from .modeling_tf_albert import (
|
||||
from .modeling_tf_bert import (
|
||||
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
TFBertForMaskedLM,
|
||||
TFBertForMultipleChoice,
|
||||
TFBertForPreTraining,
|
||||
TFBertForQuestionAnswering,
|
||||
TFBertForSequenceClassification,
|
||||
@@ -172,6 +174,10 @@ TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
|
||||
]
|
||||
)
|
||||
|
||||
TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
|
||||
[(BertConfig, TFBertForMultipleChoice), (AlbertConfig, TFAlbertForMultipleChoice)]
|
||||
)
|
||||
|
||||
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
|
||||
[
|
||||
(DistilBertConfig, TFDistilBertForQuestionAnswering),
|
||||
@@ -662,6 +668,153 @@ class TFAutoModelWithLMHead(object):
|
||||
)
|
||||
|
||||
|
||||
class TFAutoModelForMultipleChoice:
|
||||
r"""
|
||||
:class:`~transformers.TFAutoModelForMultipleChoice` is a generic model class
|
||||
that will be instantiated as one of the multiple choice model classes of the library
|
||||
when created with the `TFAutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)`
|
||||
class method.
|
||||
|
||||
The `from_pretrained()` method takes care of returning the correct model class instance
|
||||
based on the `model_type` property of the config object, or when it's missing,
|
||||
falling back to using pattern matching on the `pretrained_model_name_or_path` string.
|
||||
|
||||
The model class to instantiate is selected as the first pattern matching
|
||||
in the `pretrained_model_name_or_path` string (in the following order):
|
||||
- contains `albert`: TFAlbertForMultipleChoice (Albert model)
|
||||
- contains `bert`: TFBertForMultipleChoice (Bert model)
|
||||
|
||||
This class cannot be instantiated using `__init__()` (throws an error).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise EnvironmentError(
|
||||
"TFAutoModelForMultipleChoice is designed to be instantiated "
|
||||
"using the `TFAutoModelForMultipleChoice.from_pretrained(pretrained_model_name_or_path)` or "
|
||||
"`TFAutoModelForMultipleChoice.from_config(config)` methods."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config):
|
||||
r""" Instantiates one of the base model classes of the library
|
||||
from a configuration.
|
||||
|
||||
config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
|
||||
The model class to instantiate is selected based on the configuration class:
|
||||
- isInstance of `albert` configuration class: AlbertModel (Albert model)
|
||||
- isInstance of `bert` configuration class: BertModel (Bert model)
|
||||
|
||||
Examples::
|
||||
|
||||
config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
|
||||
model = AutoModelForMulitpleChoice.from_config(config) # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
"""
|
||||
for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
|
||||
if isinstance(config, config_class):
|
||||
return model_class(config)
|
||||
raise ValueError(
|
||||
"Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
|
||||
"Model type should be one of {}.".format(
|
||||
config.__class__,
|
||||
cls.__name__,
|
||||
", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
||||
r""" Instantiates one of the multiple choice model classes of the library
|
||||
from a pre-trained model configuration.
|
||||
|
||||
The `from_pretrained()` method takes care of returning the correct model class instance
|
||||
based on the `model_type` property of the config object, or when it's missing,
|
||||
falling back to using pattern matching on the `pretrained_model_name_or_path` string.
|
||||
|
||||
The model class to instantiate is selected as the first pattern matching
|
||||
in the `pretrained_model_name_or_path` string (in the following order):
|
||||
- contains `albert`: TFRobertaForMultiple (Albert model)
|
||||
- contains `bert`: TFBertForMultipleChoice (Bert model)
|
||||
|
||||
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
|
||||
To train the model, you should first set it back in training mode with `model.train()`
|
||||
|
||||
Params:
|
||||
pretrained_model_name_or_path: either:
|
||||
|
||||
- a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
|
||||
- a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
|
||||
- a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
|
||||
- a path or url to a `PyTorch, TF 1.X or TF 2.0 checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In the case of a PyTorch checkpoint, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument.
|
||||
|
||||
from_pt: (`Optional`) Boolean
|
||||
Set to True if the Checkpoint is a PyTorch checkpoint.
|
||||
|
||||
model_args: (`optional`) Sequence of positional arguments:
|
||||
All remaning positional arguments will be passed to the underlying model's ``__init__`` method
|
||||
|
||||
config: (`optional`) instance of a class derived from :class:`~transformers.PretrainedConfig`:
|
||||
Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
|
||||
|
||||
- the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
|
||||
- the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
|
||||
- the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
|
||||
|
||||
state_dict: (`optional`) dict:
|
||||
an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
|
||||
This option can be used if you want to create a model from a pretrained configuration but load your own weights.
|
||||
In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
|
||||
|
||||
cache_dir: (`optional`) string:
|
||||
Path to a directory in which a downloaded pre-trained model
|
||||
configuration should be cached if the standard cache should not be used.
|
||||
|
||||
force_download: (`optional`) boolean, default False:
|
||||
Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
|
||||
|
||||
resume_download: (`optional`) boolean, default False:
|
||||
Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
|
||||
|
||||
proxies: (`optional`) dict, default None:
|
||||
A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
|
||||
The proxies are used on each request.
|
||||
|
||||
output_loading_info: (`optional`) boolean:
|
||||
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
|
||||
|
||||
kwargs: (`optional`) Remaining dictionary of keyword arguments:
|
||||
Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attention=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
|
||||
|
||||
- If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
|
||||
- If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
|
||||
|
||||
Examples::
|
||||
|
||||
model = TFAutoModelFormultipleChoice.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
|
||||
model = TFAutoModelFormultipleChoice.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
|
||||
model = TFAutoModelFormultipleChoice.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
|
||||
assert model.config.output_attention == True
|
||||
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
|
||||
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
|
||||
model = TFAutoModelFormultipleChoice.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
|
||||
|
||||
"""
|
||||
config = kwargs.pop("config", None)
|
||||
if not isinstance(config, PretrainedConfig):
|
||||
config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
||||
|
||||
for config_class, model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.items():
|
||||
if isinstance(config, config_class):
|
||||
return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
|
||||
raise ValueError(
|
||||
"Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n"
|
||||
"Model type should be one of {}.".format(
|
||||
config.__class__,
|
||||
cls.__name__,
|
||||
", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class TFAutoModelForSequenceClassification(object):
|
||||
r"""
|
||||
:class:`~transformers.TFAutoModelForSequenceClassification` is a generic model class
|
||||
|
||||
@@ -125,7 +125,9 @@ class TFTrainer:
|
||||
in the Tensorflow documentation and those contained in the transformers library.
|
||||
"""
|
||||
if self.args.optimizer_name == "adamw":
|
||||
self.optimizer = create_optimizer(self.args.learning_rate, self.train_steps, self.args.warmup_steps)
|
||||
self.optimizer = create_optimizer(
|
||||
self.args.learning_rate, self.train_steps, self.args.warmup_steps, self.args.end_lr
|
||||
)
|
||||
else:
|
||||
try:
|
||||
self.optimizer = tf.keras.optimizers.get(
|
||||
@@ -139,6 +141,7 @@ class TFTrainer:
|
||||
self.optimizer = tf.keras.optimizers.get(
|
||||
{"class_name": self.args.optimizer_name, "config": {"learning_rate": self.args.learning_rate}}
|
||||
)
|
||||
logger.info("Created an/a {} optimizer".format(self.optimizer))
|
||||
|
||||
def _create_checkpoint_manager(self, max_to_keep: int = 5, load_model: bool = True) -> None:
|
||||
"""
|
||||
@@ -149,6 +152,7 @@ class TFTrainer:
|
||||
load_model: if we want to start the training from the latest checkpoint.
|
||||
"""
|
||||
ckpt = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model)
|
||||
|
||||
self.model.ckpt_manager = tf.train.CheckpointManager(ckpt, PREFIX_CHECKPOINT_DIR, max_to_keep=max_to_keep)
|
||||
|
||||
if load_model:
|
||||
@@ -425,5 +429,6 @@ class TFTrainer:
|
||||
|
||||
path = os.path.join(self.args.output_dir, "saved_model")
|
||||
|
||||
logger.info("Saving model in {}".format(path))
|
||||
os.makedirs(path, exist_ok=True)
|
||||
self.model.save_pretrained(self.args.output_dir)
|
||||
|
||||
@@ -30,6 +30,12 @@ class TFTrainingArguments(TrainingArguments):
|
||||
"help": "Name of a Tensorflow loss. For the list see: https://www.tensorflow.org/api_docs/python/tf/keras/losses"
|
||||
},
|
||||
)
|
||||
tpu_name: str = field(
|
||||
default=None, metadata={"help": "Name of TPU"},
|
||||
)
|
||||
end_lr: float = field(
|
||||
default=0, metadata={"help": "End learning rate for optimizer"},
|
||||
)
|
||||
eval_steps: int = field(default=1000, metadata={"help": "Run an evaluation every X steps."})
|
||||
debug: bool = field(
|
||||
default=False, metadata={"help": "Activate the trace to record computation graphs and profiling information"}
|
||||
@@ -45,7 +51,10 @@ class TFTrainingArguments(TrainingArguments):
|
||||
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
|
||||
else:
|
||||
try:
|
||||
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
|
||||
if self.tpu_name:
|
||||
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(self.tpu_name)
|
||||
else:
|
||||
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
|
||||
except ValueError:
|
||||
tpu = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user