Black 20 release

This commit is contained in:
Lysandre
2020-08-26 17:20:22 +02:00
parent e78c110338
commit a75c64d80c
191 changed files with 4807 additions and 3503 deletions

View File

@@ -703,10 +703,10 @@ def write_predictions_extended(
tokenizer,
verbose_logging,
):
""" XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
"""XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
Requires utils_squad_evaluate.py
"""
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction", ["feature_index", "start_index", "end_index", "start_log_prob", "end_log_prob"]

View File

@@ -31,47 +31,47 @@ XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class XxxConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
It is used to instantiate a XXX model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
It is used to instantiate a XXX model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
for more information.
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
for more information.
Args:
vocab_size (:obj:`int`, optional, defaults to 30522):
Vocabulary size of the XXX model. Defines the different tokens that
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
hidden_size (:obj:`int`, optional, defaults to 768):
Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, optional, defaults to 12):
Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, optional, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler.
Args:
vocab_size (:obj:`int`, optional, defaults to 30522):
Vocabulary size of the XXX model. Defines the different tokens that
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
hidden_size (:obj:`int`, optional, defaults to 768):
Dimensionality of the encoder layers and the pooler layer.
num_hidden_layers (:obj:`int`, optional, defaults to 12):
Number of hidden layers in the Transformer encoder.
num_attention_heads (:obj:`int`, optional, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder.
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler.
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, optional, defaults to 512):
The maximum sequence length that this model might ever be used with.
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, optional, defaults to 2):
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
initializer_range (:obj:`float`, optional, defaults to 0.02):
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
The epsilon used by the layer normalization layers.
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
kwargs:
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, optional, defaults to 512):
The maximum sequence length that this model might ever be used with.
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
type_vocab_size (:obj:`int`, optional, defaults to 2):
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
initializer_range (:obj:`float`, optional, defaults to 0.02):
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
The epsilon used by the layer normalization layers.
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
kwargs:
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
"""
model_type = "xxx"

View File

@@ -223,7 +223,10 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
pooled_output = self.pooler(sequence_output)
if not return_dict:
return (sequence_output, pooled_output,) + encoder_outputs[1:]
return (
sequence_output,
pooled_output,
) + encoder_outputs[1:]
return TFBaseModelOutputWithPooling(
last_hidden_state=sequence_output,
@@ -241,8 +244,8 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
# pointers for your model.
####################################################
class TFXxxPreTrainedModel(TFPreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
config_class = XxxConfig
@@ -422,7 +425,10 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
return ((loss,) + output) if loss is not None else output
return TFMaskedLMOutput(
loss=loss, logits=prediction_scores, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=prediction_scores,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
@@ -503,7 +509,10 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
return ((loss,) + output) if loss is not None else output
return TFSequenceClassifierOutput(
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
@@ -524,7 +533,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
@property
def dummy_inputs(self):
""" Dummy inputs to build the network.
"""Dummy inputs to build the network.
Returns:
tf.Tensor with dummy inputs
@@ -631,7 +640,10 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
return ((loss,) + output) if loss is not None else output
return TFMultipleChoiceModelOutput(
loss=loss, logits=reshaped_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=reshaped_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
@@ -710,7 +722,10 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
return ((loss,) + output) if loss is not None else output
return TFTokenClassifierOutput(
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)

View File

@@ -59,8 +59,7 @@ XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
####################################################
def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
""" Load tf checkpoints in a pytorch model.
"""
"""Load tf checkpoints in a pytorch model."""
try:
import re
@@ -189,8 +188,8 @@ XxxPooler = nn.Module
class XxxPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
"""
config_class = XxxConfig
@@ -290,9 +289,9 @@ class XxxModel(XxxPreTrainedModel):
self.embeddings.word_embeddings = new_embeddings
def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel
"""Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel
"""
for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads)
@@ -517,7 +516,10 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
return ((loss,) + output) if loss is not None else output
return SequenceClassifierOutput(
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
@@ -603,7 +605,10 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
return ((loss,) + output) if loss is not None else output
return MultipleChoiceModelOutput(
loss=loss, logits=reshaped_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=reshaped_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
@@ -686,7 +691,10 @@ class XxxForTokenClassification(XxxPreTrainedModel):
return ((loss,) + output) if loss is not None else output
return TokenClassifierOutput(
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)