Clean RAG docs and template docs (#7348)
* Clean RAG docs and template docs * Fix typo * Better doc
This commit is contained in:
@@ -31,10 +31,10 @@ XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
|
||||
class XxxConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.XXXModel`.
|
||||
It is used to instantiate a XXX model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.XxxModel` or a
|
||||
:class:`~transformers.TFXxxModel`. It is used to instantiate a XXX model according to the specified
|
||||
arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar
|
||||
configuration to that of the XXX `xxx-base-uncased <https://huggingface.co/xxx/xxx-base-uncased>`__ architecture.
|
||||
|
||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used
|
||||
to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig`
|
||||
@@ -42,33 +42,35 @@ class XxxConfig(PretrainedConfig):
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, optional, defaults to 30522):
|
||||
Vocabulary size of the XXX model. Defines the different tokens that
|
||||
can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.XXXModel`.
|
||||
hidden_size (:obj:`int`, optional, defaults to 768):
|
||||
vocab_size (:obj:`int`, `optional`, defaults to 30522):
|
||||
Vocabulary size of the XXX model. Defines the number of different tokens that can be represented by the
|
||||
:obj:`inputs_ids` passed when calling :class:`~transformers.XxxModel` or
|
||||
:class:`~transformers.TFXxxModel`.
|
||||
hidden_size (:obj:`int`, `optional`, defaults to 768):
|
||||
Dimensionality of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (:obj:`int`, optional, defaults to 12):
|
||||
num_hidden_layers (:obj:`int`, `optional`, defaults to 12):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (:obj:`int`, optional, defaults to 12):
|
||||
num_attention_heads (:obj:`int`, `optional`, defaults to 12):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
hidden_act (:obj:`str` or :obj:`function`, optional, defaults to :obj:`"gelu"`):
|
||||
hidden_act (:obj:`str` or :obj:`Callable`, `optional`, defaults to :obj:`"gelu"`):
|
||||
The non-linear activation function (function or string) in the encoder and pooler.
|
||||
|
||||
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
|
||||
hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
|
||||
attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
|
||||
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
|
||||
The dropout ratio for the attention probabilities.
|
||||
max_position_embeddings (:obj:`int`, optional, defaults to 512):
|
||||
max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
type_vocab_size (:obj:`int`, optional, defaults to 2):
|
||||
The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
|
||||
initializer_range (:obj:`float`, optional, defaults to 0.02):
|
||||
type_vocab_size (:obj:`int`, `optional`, defaults to 2):
|
||||
The vocabulary size of the :obj:`token_type_ids` passed when calling :class:`~transformers.XxxModel` or
|
||||
:class:`~transformers.TFXxxModel`.
|
||||
initializer_range (:obj:`float`, `optional`, defaults to 0.02):
|
||||
The standard deviation of the :obj:`truncated_normal_initializer` for initializing all weight matrices.
|
||||
layer_norm_eps (:obj:`float`, optional, defaults to 1e-5):
|
||||
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-5):
|
||||
The epsilon used by the layer normalization layers.
|
||||
gradient_checkpointing (:obj:`bool`, optional, defaults to :obj:`False`):
|
||||
gradient_checkpointing (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
If :obj:`True`, use gradient checkpointing to save memory at the expense of slower backward pass.
|
||||
kwargs:
|
||||
Additional arguments for common configurations, passed to :class:`~transformers.PretrainedConfig`.
|
||||
|
||||
@@ -257,32 +257,37 @@ class TFXxxPreTrainedModel(TFPreTrainedModel):
|
||||
|
||||
|
||||
XXX_START_DOCSTRING = r"""
|
||||
|
||||
The XXX model was proposed in
|
||||
`XXX: Pre-training of Deep Bidirectional Transformers for Language Understanding
|
||||
<https://arxiv.org/abs/1810.04805>`__ by....
|
||||
|
||||
This model is a `tf.keras.Model <https://www.tensorflow.org/api_docs/python/tf/keras/Model>`__ sub-class.
|
||||
Use it as a regular TF 2.0 Keras Model and
|
||||
refer to the TF 2.0 documentation for all matter related to general usage and behavior.
|
||||
This model inherits from :class:`~transformers.TFPreTrainedModel`. Check the superclass documentation for the
|
||||
generic methods the library implements for all its model (such as downloading or saving, resizing the input
|
||||
embeddings, pruning heads etc.)
|
||||
|
||||
This model is also a `tf.keras.Model <https://www.tensorflow.org/api_docs/python/tf/keras/Model>`__ subclass.
|
||||
Use it as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general
|
||||
usage and behavior.
|
||||
|
||||
.. note::
|
||||
|
||||
TF 2.0 models accepts two formats as inputs:
|
||||
|
||||
- having all inputs as keyword arguments (like PyTorch models), or
|
||||
- having all inputs as a list, tuple or dict in the first positional arguments.
|
||||
- having all inputs as keyword arguments (like PyTorch models), or
|
||||
- having all inputs as a list, tuple or dict in the first positional arguments.
|
||||
|
||||
This second option is useful when using :obj:`tf.keras.Model.fit()` method which currently requires having
|
||||
This second option is useful when using :meth:`tf.keras.Model.fit` method which currently requires having
|
||||
all the tensors in the first argument of the model call function: :obj:`model(inputs)`.
|
||||
|
||||
If you choose this second option, there are three possibilities you can use to gather all the input Tensors
|
||||
in the first positional argument :
|
||||
|
||||
- a single Tensor with input_ids only and nothing else: :obj:`model(inputs_ids)`
|
||||
- a single Tensor with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)`
|
||||
- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
|
||||
:obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
|
||||
- a dictionary with one or several input Tensors associated to the input names given in the docstring:
|
||||
:obj:`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
|
||||
:obj:`model({"input_ids": input_ids, "token_type_ids": token_type_ids})`
|
||||
|
||||
Parameters:
|
||||
config (:class:`~transformers.XxxConfig`): Model configuration class with all the parameters of the model.
|
||||
@@ -292,27 +297,31 @@ XXX_START_DOCSTRING = r"""
|
||||
|
||||
XXX_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`):
|
||||
input_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using :class:`transformers.XxxTokenizer`.
|
||||
See :func:`transformers.PreTrainedTokenizer.encode` and
|
||||
:func:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
Indices can be obtained using :class:`~transformers.BertTokenizer`.
|
||||
See :func:`transformers.PreTrainedTokenizer.__call__` and
|
||||
:func:`transformers.PreTrainedTokenizer.encode` for details.
|
||||
|
||||
`What are input IDs? <../glossary.html#input-ids>`__
|
||||
attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`):
|
||||
attention_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
|
||||
Mask to avoid performing attention on padding token indices.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **maked**.
|
||||
|
||||
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||
token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`):
|
||||
token_type_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
|
||||
Segment token indices to indicate first and second portions of the inputs.
|
||||
Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
|
||||
corresponds to a `sentence B` token
|
||||
Indices are selected in ``[0, 1]``:
|
||||
|
||||
- 0 corresponds to a `sentence A` token,
|
||||
- 1 corresponds to a `sentence B` token.
|
||||
|
||||
`What are token type IDs? <../glossary.html#token-type-ids>`__
|
||||
position_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`{0}`, `optional`):
|
||||
position_ids (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`({0})`, `optional`):
|
||||
Indices of positions of each input sequence tokens in the position embeddings.
|
||||
Selected in the range ``[0, config.max_position_embeddings - 1]``.
|
||||
|
||||
@@ -320,21 +329,25 @@ XXX_INPUTS_DOCSTRING = r"""
|
||||
head_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
|
||||
Mask to nullify selected heads of the self-attention modules.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
:obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**.
|
||||
inputs_embeds (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, embedding_dim)`, `optional`):
|
||||
|
||||
- 1 indicates the head is **not masked**,
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
inputs_embeds (:obj:`tf.Tensor` of shape :obj:`({0}, hidden_size)`, `optional`):
|
||||
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
|
||||
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
|
||||
than the model's internal embedding lookup matrix.
|
||||
training (:obj:`boolean`, `optional`, defaults to :obj:`False`):
|
||||
Whether to activate dropout modules (if set to :obj:`True`) during training or to de-activate them
|
||||
(if set to :obj:`False`) for evaluation.
|
||||
This is useful if you want more control over how to convert :obj:`input_ids` indices into associated
|
||||
vectors than the model's internal embedding lookup matrix.
|
||||
output_attentions (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
|
||||
Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
|
||||
tensors for more detail.
|
||||
output_hidden_states (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the hidden states of all layers are returned. See ``hidden_states`` under returned tensors for more detail.
|
||||
Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
|
||||
more detail.
|
||||
return_dict (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the model will return a :class:`~transformers.file_utils.ModelOutput` instead of a
|
||||
plain tuple.
|
||||
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
|
||||
training (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
behaviors between training and evaluation).
|
||||
"""
|
||||
|
||||
|
||||
@@ -347,7 +360,7 @@ class TFXxxModel(TFXxxPreTrainedModel):
|
||||
super().__init__(config, *inputs, **kwargs)
|
||||
self.transformer = TFXxxMainLayer(config, name="transformer")
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -370,7 +383,7 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel, TFMaskedLanguageModelingLoss):
|
||||
self.transformer = TFXxxMainLayer(config, name="transformer")
|
||||
self.mlm = TFXxxMLMHead(config, self.transformer.embeddings, name="mlm")
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -452,7 +465,7 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel, TFSequenceClassificat
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -544,7 +557,7 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
|
||||
"""
|
||||
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -568,8 +581,8 @@ class TFXxxForMultipleChoice(TFXxxPreTrainedModel, TFMultipleChoiceLoss):
|
||||
r"""
|
||||
labels (:obj:`tf.Tensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for computing the multiple choice classification loss.
|
||||
Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension
|
||||
of the input tensors. (see `input_ids` above)s after the attention softmax, used to compute the weighted average in the self-attention
|
||||
Indices should be in ``[0, ..., num_choices]`` where :obj:`num_choices` is the size of the second dimension
|
||||
of the input tensors. (See :obj:`input_ids` above)
|
||||
heads.
|
||||
"""
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
@@ -667,7 +680,7 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -734,8 +747,8 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel, TFTokenClassificationLos
|
||||
|
||||
|
||||
@add_start_docstrings(
|
||||
"""XXX Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
|
||||
the hidden-states output to compute `span start logits` and `span end logits`). """,
|
||||
"""XXX Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
|
||||
layer on top of the hidden-states output to compute `span start logits` and `span end logits`). """,
|
||||
XXX_START_DOCSTRING,
|
||||
)
|
||||
class TFXxxForQuestionAnswering(TFXxxPreTrainedModel, TFQuestionAnsweringLoss):
|
||||
@@ -748,7 +761,7 @@ class TFXxxForQuestionAnswering(TFXxxPreTrainedModel, TFQuestionAnsweringLoss):
|
||||
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
|
||||
)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING)
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-cased",
|
||||
@@ -773,11 +786,11 @@ class TFXxxForQuestionAnswering(TFXxxPreTrainedModel, TFQuestionAnsweringLoss):
|
||||
r"""
|
||||
start_positions (:obj:`tf.Tensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for position (index) of the start of the labelled span for computing the token classification loss.
|
||||
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||
Positions are clamped to the length of the sequence (:obj:`sequence_length`).
|
||||
Position outside of the sequence are not taken into account for computing the loss.
|
||||
end_positions (:obj:`tf.Tensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for position (index) of the end of the labelled span for computing the token classification loss.
|
||||
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||
Positions are clamped to the length of the sequence (:obj:`sequence_length`).
|
||||
Position outside of the sequence are not taken into account for computing the loss.
|
||||
"""
|
||||
return_dict = return_dict if return_dict is not None else self.transformer.return_dict
|
||||
|
||||
@@ -209,11 +209,16 @@ class XxxPreTrainedModel(PreTrainedModel):
|
||||
module.bias.data.zero_()
|
||||
|
||||
|
||||
XXX_START_DOCSTRING = r""" The XXX model was proposed in
|
||||
`XXX: Pre-training of Deep Bidirectional Transformers for Language Understanding
|
||||
XXX_START_DOCSTRING = r"""
|
||||
|
||||
The XXX model was proposed in `XXX: Pre-training of Deep Bidirectional Transformers for Language Understanding
|
||||
<https://arxiv.org/abs/1810.04805>`__ by....
|
||||
|
||||
This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
|
||||
This model inherits from :class:`~transformers.PreTrainedModel`. Check the superclass documentation for the generic
|
||||
methods the library implements for all its model (such as downloading or saving, resizing the input embeddings,
|
||||
pruning heads etc.)
|
||||
|
||||
This model is also a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__ subclass.
|
||||
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
|
||||
usage and behavior.
|
||||
|
||||
@@ -225,27 +230,31 @@ XXX_START_DOCSTRING = r""" The XXX model was proposed in
|
||||
|
||||
XXX_INPUTS_DOCSTRING = r"""
|
||||
Inputs:
|
||||
input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`):
|
||||
input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
|
||||
Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
Indices can be obtained using :class:`transformers.XxxTokenizer`.
|
||||
See :func:`transformers.PreTrainedTokenizer.encode` and
|
||||
:func:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
Indices can be obtained using :class:`~transformers.XxxTokenizer`.
|
||||
See :meth:`transformers.PreTrainedTokenizer.encode` and
|
||||
:meth:`transformers.PreTrainedTokenizer.__call__` for details.
|
||||
|
||||
`What are input IDs? <../glossary.html#input-ids>`__
|
||||
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`):
|
||||
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
|
||||
Mask to avoid performing attention on padding token indices.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
|
||||
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **maked**.
|
||||
|
||||
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
|
||||
token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
|
||||
Segment token indices to indicate first and second portions of the inputs.
|
||||
Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
|
||||
corresponds to a `sentence B` token
|
||||
Indices are selected in ``[0, 1]``:
|
||||
|
||||
- 0 corresponds to a `sentence A` token,
|
||||
- 1 corresponds to a `sentence B` token.
|
||||
|
||||
`What are token type IDs? <../glossary.html#token-type-ids>`_
|
||||
position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`):
|
||||
position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
|
||||
Indices of positions of each input sequence tokens in the position embeddings.
|
||||
Selected in the range ``[0, config.max_position_embeddings - 1]``.
|
||||
|
||||
@@ -253,18 +262,22 @@ XXX_INPUTS_DOCSTRING = r"""
|
||||
head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
|
||||
Mask to nullify selected heads of the self-attention modules.
|
||||
Mask values selected in ``[0, 1]``:
|
||||
:obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**.
|
||||
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
|
||||
|
||||
- 1 indicates the head is **not masked**,
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
|
||||
Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
|
||||
This is useful if you want more control over how to convert `input_ids` indices into associated vectors
|
||||
than the model's internal embedding lookup matrix.
|
||||
This is useful if you want more control over how to convert :obj:`input_ids` indices into associated
|
||||
vectors than the model's internal embedding lookup matrix.
|
||||
output_attentions (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
|
||||
Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
|
||||
tensors for more detail.
|
||||
output_hidden_states (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the hidden states of all layers are returned. See ``hidden_states`` under returned tensors for more detail.
|
||||
Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
|
||||
more detail.
|
||||
return_dict (:obj:`bool`, `optional`):
|
||||
If set to ``True``, the model will return a :class:`~transformers.file_utils.ModelOutput` instead of a
|
||||
plain tuple.
|
||||
Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
|
||||
"""
|
||||
|
||||
|
||||
@@ -296,7 +309,7 @@ class XxxModel(XxxPreTrainedModel):
|
||||
for layer, heads in heads_to_prune.items():
|
||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -378,7 +391,7 @@ class XxxForMaskedLM(XxxPreTrainedModel):
|
||||
def get_output_embeddings(self):
|
||||
return self.lm_head
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -455,7 +468,7 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -538,7 +551,7 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -561,8 +574,8 @@ class XxxForMultipleChoice(XxxPreTrainedModel):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for computing the multiple choice classification loss.
|
||||
Indices should be in ``[0, ..., num_choices-1]`` where `num_choices` is the size of the second dimension
|
||||
of the input tensors. (see `input_ids` above)
|
||||
Indices should be in ``[0, ..., num_choices-1]`` where :obj:`num_choices` is the size of the second dimension
|
||||
of the input tensors. (See :obj:`input_ids` above)
|
||||
"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
||||
@@ -628,7 +641,7 @@ class XxxForTokenClassification(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -713,7 +726,7 @@ class XxxForQuestionAnswering(XxxPreTrainedModel):
|
||||
|
||||
self.init_weights()
|
||||
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||
@add_start_docstrings_to_callable(XXX_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||
@add_code_sample_docstrings(
|
||||
tokenizer_class=_TOKENIZER_FOR_DOC,
|
||||
checkpoint="xxx-base-uncased",
|
||||
@@ -737,11 +750,11 @@ class XxxForQuestionAnswering(XxxPreTrainedModel):
|
||||
r"""
|
||||
start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for position (index) of the start of the labelled span for computing the token classification loss.
|
||||
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||
Positions are clamped to the length of the sequence (:obj:`sequence_length`).
|
||||
Position outside of the sequence are not taken into account for computing the loss.
|
||||
end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for position (index) of the end of the labelled span for computing the token classification loss.
|
||||
Positions are clamped to the length of the sequence (`sequence_length`).
|
||||
Positions are clamped to the length of the sequence (:obj:`sequence_length`).
|
||||
Position outside of the sequence are not taken into account for computing the loss.
|
||||
"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
@@ -80,16 +80,16 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
r"""
|
||||
Constructs a XXX tokenizer. Based on XXX.
|
||||
|
||||
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
|
||||
should refer to the superclass for more information regarding methods.
|
||||
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
|
||||
Users should refer to this superclass for more information regarding those methods.
|
||||
|
||||
Args:
|
||||
vocab_file (:obj:`str`):
|
||||
File containing the vocabulary.
|
||||
do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Whether to lowercase the input when tokenizing.
|
||||
Whether or not to lowercase the input when tokenizing.
|
||||
do_basic_tokenize (:obj:`bool`, `optional`, defaults to :obj:`True`):
|
||||
Whether to do basic tokenization before WordPiece.
|
||||
Whether ot not to do basic tokenization before WordPiece.
|
||||
never_split (:obj:`Iterable`, `optional`):
|
||||
Collection of tokens which will never be split during tokenization. Only has an effect when
|
||||
:obj:`do_basic_tokenize=True`
|
||||
@@ -194,19 +194,19 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks
|
||||
by concatenating and adding special tokens.
|
||||
A BERT sequence has the following format:
|
||||
A XXX sequence has the following format:
|
||||
|
||||
- single sequence: ``[CLS] X [SEP]``
|
||||
- pair of sequences: ``[CLS] A [SEP] B [SEP]``
|
||||
|
||||
Args:
|
||||
token_ids_0 (:obj:`List[int]`):
|
||||
List of IDs to which the special tokens will be added
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (:obj:`List[int]`, `optional`):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
:obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
|
||||
:obj:`List[int]`: List of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
|
||||
"""
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
@@ -218,16 +218,16 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
||||
) -> List[int]:
|
||||
"""
|
||||
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||
special tokens using the tokenizer ``prepare_for_model`` method.
|
||||
|
||||
Args:
|
||||
token_ids_0 (:obj:`List[int]`):
|
||||
List of ids.
|
||||
List of IDs.
|
||||
token_ids_1 (:obj:`List[int]`, `optional`):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Set to True if the token list is already formatted with special tokens for the model
|
||||
Whether or not the token list is already formatted with special tokens for the model.
|
||||
|
||||
Returns:
|
||||
:obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
||||
@@ -249,7 +249,7 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
Create a mask from the two sequences passed to be used in a sequence-pair classification task.
|
||||
A BERT sequence pair mask has the following format:
|
||||
|
||||
::
|
||||
@@ -257,11 +257,11 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||
| first sequence | second sequence |
|
||||
|
||||
if token_ids_1 is None, only returns the first portion of the mask (0's).
|
||||
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
|
||||
|
||||
Args:
|
||||
token_ids_0 (:obj:`List[int]`):
|
||||
List of ids.
|
||||
List of IDs.
|
||||
token_ids_1 (:obj:`List[int]`, `optional`):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
@@ -277,7 +277,7 @@ class XxxTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def save_vocabulary(self, vocab_path):
|
||||
"""
|
||||
Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
|
||||
Save the vocabulary (copy original file) and special tokens file to a directory.
|
||||
|
||||
Args:
|
||||
vocab_path (:obj:`str`):
|
||||
|
||||
Reference in New Issue
Block a user