Black 20 release
This commit is contained in:
@@ -703,10 +703,10 @@ def write_predictions_extended(
|
||||
tokenizer,
|
||||
verbose_logging,
|
||||
):
|
||||
""" XLNet write prediction logic (more complex than Bert's).
|
||||
Write final predictions to the json file and log-odds of null if needed.
|
||||
"""XLNet write prediction logic (more complex than Bert's).
|
||||
Write final predictions to the json file and log-odds of null if needed.
|
||||
|
||||
Requires utils_squad_evaluate.py
|
||||
Requires utils_squad_evaluate.py
|
||||
"""
|
||||
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
|
||||
"PrelimPrediction", ["feature_index", "start_index", "end_index", "start_log_prob", "end_log_prob"]
|
||||
|
||||
@@ -31,47 +31,47 @@ XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
|
||||
class XxxConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
|
||||
It is used to instantiate a XXX model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
|
||||
It is used to instantiate a XXX model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
|
||||
|
||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
|
||||
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
|
||||
for more information.
|
||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
|
||||
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
|
||||
for more information.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, optional, defaults to 30522):
|
||||
Vocabulary size of the XXX model. Defines the different tokens that
|
||||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
|
||||
hidden_size (:obj:`int`, optional, defaults to 768):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (:obj:`int`, optional, defaults to 12):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (:obj:`int`, optional, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
Args:
|
||||
vocab_size (:obj:`int`, optional, defaults to 30522):
|
||||
Vocabulary size of the XXX model. Defines the different tokens that
|
||||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
|
||||
hidden_size (:obj:`int`, optional, defaults to 768):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (:obj:`int`, optional, defaults to 12):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (:obj:`int`, optional, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
|
||||
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
|
||||
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
max_position_embeddings (:obj:`int`, optional, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (:obj:`int`, optional, defaults to 2):
|
||||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
|
||||
initializer_range (:obj:`float`, optional, defaults to 0.02):
|
||||
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
|
||||
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
|
||||
The epsilon used by the layer normalization layers.
|
||||
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
|
||||
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
|
||||
kwargs:
|
||||
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
|
||||
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
|
||||
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
max_position_embeddings (:obj:`int`, optional, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (:obj:`int`, optional, defaults to 2):
|
||||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
|
||||
initializer_range (:obj:`float`, optional, defaults to 0.02):
|
||||
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
|
||||
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
|
||||
The epsilon used by the layer normalization layers.
|
||||
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
|
||||
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
|
||||
kwargs:
|
||||
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
|
||||
"""
|
||||
model_type = "xxx"
|
||||
|
||||
|
||||
@@ -223,7 +223,10 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
|
||||
pooled_output = self.pooler(sequence_output)
|
||||
|
||||
if not return_dict:
|
||||
return (sequence_output, pooled_output,) + encoder_outputs[1:]
|
||||
return (
|
||||
sequence_output,
|
||||
pooled_output,
|
||||
) + encoder_outputs[1:]
|
||||
|
||||
return TFBaseModelOutputWithPooling(
|
||||
last_hidden_state=sequence_output,
|
||||
@@ -241,8 +244,8 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
|
||||
# pointers for your model.
|
||||
####################################################
|
||||
class TFXxxPreTrainedModel(TFPreTrainedModel):
|
||||
""" An abstract class to handle weights initialization and
|
||||
a simple interface for downloading and loading pretrained models.
|
||||
"""An abstract class to handle weights initialization and
|
||||
a simple interface for downloading and loading pretrained models.
|
||||
"""
|
||||
|
||||
config_class = XxxConfig
|
||||
@@ -422,7 +425,10 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return TFMaskedLMOutput(
|
||||
loss=loss, logits=prediction_scores, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=prediction_scores,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
@@ -503,7 +509,10 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return TFSequenceClassifierOutput(
|
||||
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
@@ -524,7 +533,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
|
||||
|
||||
@property
|
||||
def dummy_inputs(self):
|
||||
""" Dummy inputs to build the network.
|
||||
"""Dummy inputs to build the network.
|
||||
|
||||
Returns:
|
||||
tf.Tensor with dummy inputs
|
||||
@@ -631,7 +640,10 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return TFMultipleChoiceModelOutput(
|
||||
loss=loss, logits=reshaped_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=reshaped_logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
@@ -710,7 +722,10 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return TFTokenClassifierOutput(
|
||||
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -59,8 +59,7 @@ XXX_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
|
||||
####################################################
|
||||
def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
|
||||
""" Load tf checkpoints in a pytorch model.
|
||||
"""
|
||||
"""Load tf checkpoints in a pytorch model."""
|
||||
try:
|
||||
import re
|
||||
|
||||
@@ -189,8 +188,8 @@ XxxPooler = nn.Module
|
||||
|
||||
|
||||
class XxxPreTrainedModel(PreTrainedModel):
|
||||
""" An abstract class to handle weights initialization and
|
||||
a simple interface for downloading and loading pretrained models.
|
||||
"""An abstract class to handle weights initialization and
|
||||
a simple interface for downloading and loading pretrained models.
|
||||
"""
|
||||
|
||||
config_class = XxxConfig
|
||||
@@ -290,9 +289,9 @@ class XxxModel(XxxPreTrainedModel):
|
||||
self.embeddings.word_embeddings = new_embeddings
|
||||
|
||||
def _prune_heads(self, heads_to_prune):
|
||||
""" Prunes heads of the model.
|
||||
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
|
||||
See base class PreTrainedModel
|
||||
"""Prunes heads of the model.
|
||||
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
|
||||
See base class PreTrainedModel
|
||||
"""
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
@@ -517,7 +516,10 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return SequenceClassifierOutput(
|
||||
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
@@ -603,7 +605,10 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return MultipleChoiceModelOutput(
|
||||
loss=loss, logits=reshaped_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=reshaped_logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
@@ -686,7 +691,10 @@ class XxxForTokenClassification(XxxPreTrainedModel):
|
||||
return ((loss,) + output) if loss is not None else output
|
||||
|
||||
return TokenClassifierOutput(
|
||||
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
|
||||
loss=loss,
|
||||
logits=logits,
|
||||
hidden_states=outputs.hidden_states,
|
||||
attentions=outputs.attentions,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user