Doc styling fixes (#8074)
* Fix a few docstrings * More fixes * Styling
This commit is contained in:
@@ -66,30 +66,31 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):
|
||||
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
|
||||
methods. Users should refer to this superclass for more information regarding those methods.
|
||||
|
||||
vocab_file (:obj:`str`): `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm`
|
||||
extension) that contains the vocabulary necessary to instantiate a tokenizer. bos_token (:obj:`str`, `optional`,
|
||||
defaults to :obj:`"<s>"`): The beginning of sequence token that was used during pretraining. Can be used a sequence
|
||||
classifier token.
|
||||
Args:
|
||||
vocab_file (:obj:`str`):
|
||||
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
|
||||
contains the vocabulary necessary to instantiate a tokenizer.
|
||||
bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
|
||||
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
|
||||
|
||||
.. note::
|
||||
|
||||
When building a sequence using special tokens, this is not the token that is used for the beginning
|
||||
of sequence. The token used is the :obj:`cls_token`.
|
||||
When building a sequence using special tokens, this is not the token that is used for the beginning of
|
||||
sequence. The token used is the :obj:`cls_token`.
|
||||
eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
|
||||
The end of sequence token.
|
||||
|
||||
.. note::
|
||||
|
||||
When building a sequence using special tokens, this is not the token that is used for the end
|
||||
of sequence. The token used is the :obj:`sep_token`.
|
||||
When building a sequence using special tokens, this is not the token that is used for the end of
|
||||
sequence. The token used is the :obj:`sep_token`.
|
||||
sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
|
||||
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
|
||||
for sequence classification or for a text and a question for question answering.
|
||||
It is also used as the last token of a sequence built with special tokens.
|
||||
The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
|
||||
sequence classification or for a text and a question for question answering. It is also used as the last
|
||||
token of a sequence built with special tokens.
|
||||
cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
|
||||
The classifier token which is used when doing sequence classification (classification of the whole
|
||||
sequence instead of per-token classification). It is the first token of the sequence when built with
|
||||
special tokens.
|
||||
The classifier token which is used when doing sequence classification (classification of the whole sequence
|
||||
instead of per-token classification). It is the first token of the sequence when built with special tokens.
|
||||
unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
|
||||
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
||||
token instead.
|
||||
|
||||
@@ -129,10 +129,10 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en
|
||||
|
||||
|
||||
CUSTOM_DPR_READER_DOCSTRING = r"""
|
||||
Return a dictionary with the token ids of the input strings and other information to give to
|
||||
:obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
|
||||
sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
|
||||
size :obj:`(n_passages, sequence_length)` with the format:
|
||||
Return a dictionary with the token ids of the input strings and other information to give to
|
||||
:obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
|
||||
sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
|
||||
:obj:`(n_passages, sequence_length)` with the format:
|
||||
|
||||
::
|
||||
|
||||
@@ -189,12 +189,12 @@ CUSTOM_DPR_READER_DOCSTRING = r"""
|
||||
|
||||
`What are attention masks? <../glossary.html#attention-mask>`__
|
||||
|
||||
Return:
|
||||
Returns:
|
||||
:obj:`Dict[str, List[List[int]]]`: A dictionary with the following keys:
|
||||
|
||||
- ``input_ids``: List of token ids to be fed to a model.
|
||||
- ``attention_mask``: List of indices specifying which tokens should be attended to by the model.
|
||||
"""
|
||||
"""
|
||||
|
||||
|
||||
@add_start_docstrings(CUSTOM_DPR_READER_DOCSTRING)
|
||||
|
||||
@@ -132,12 +132,12 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en
|
||||
|
||||
|
||||
CUSTOM_DPR_READER_DOCSTRING = r"""
|
||||
Return a dictionary with the token ids of the input strings and other information to give to
|
||||
:obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
|
||||
sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
|
||||
size :obj:`(n_passages, sequence_length)` with the format:
|
||||
Return a dictionary with the token ids of the input strings and other information to give to
|
||||
:obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
|
||||
sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
|
||||
:obj:`(n_passages, sequence_length)` with the format:
|
||||
|
||||
[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>
|
||||
[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>
|
||||
|
||||
Args:
|
||||
questions (:obj:`str` or :obj:`List[str]`):
|
||||
@@ -195,7 +195,7 @@ CUSTOM_DPR_READER_DOCSTRING = r"""
|
||||
|
||||
- ``input_ids``: List of token ids to be fed to a model.
|
||||
- ``attention_mask``: List of indices specifying which tokens should be attended to by the model.
|
||||
"""
|
||||
"""
|
||||
|
||||
|
||||
@add_start_docstrings(CUSTOM_DPR_READER_DOCSTRING)
|
||||
|
||||
@@ -194,18 +194,21 @@ class Trainer:
|
||||
|
||||
The function may have zero argument, or a single one containing the optuna/Ray Tune trial object, to be
|
||||
able to choose different architectures according to hyper parameters (such as layer count, sizes of inner
|
||||
layers, dropout probabilities etc). compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
|
||||
layers, dropout probabilities etc).
|
||||
compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
|
||||
The function that will be used to compute metrics at evaluation. Must take a
|
||||
:class:`~transformers.EvalPrediction` and return a dictionary string to metric values. callbacks (List of
|
||||
:obj:`~transformers.TrainerCallback`, `optional`): A list of callbacks to customize the training loop. Will
|
||||
add those to the list of default callbacks detailed in :doc:`here <callback>`.
|
||||
:class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
|
||||
callbacks (List of :obj:`~transformers.TrainerCallback`, `optional`):
|
||||
A list of callbacks to customize the training loop. Will add those to the list of default callbacks
|
||||
detailed in :doc:`here <callback>`.
|
||||
|
||||
If you want to remove one of the default callbacks used, use the :meth:`Trainer.remove_callback` method.
|
||||
optimizers (:obj:`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR`, `optional`): A tuple
|
||||
optimizers (:obj:`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR`, `optional`): A tuple
|
||||
containing the optimizer and the scheduler to use. Will default to an instance of
|
||||
:class:`~transformers.AdamW` on your model and a scheduler given by
|
||||
:func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`. kwargs: Deprecated keyword
|
||||
arguments.
|
||||
:func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
|
||||
kwargs:
|
||||
Deprecated keyword arguments.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -144,29 +144,31 @@ class TrainingArguments:
|
||||
If using `nlp.Dataset` datasets, whether or not to automatically remove the columns unused by the model
|
||||
forward method.
|
||||
|
||||
(Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.) label_names
|
||||
(:obj:`List[str]`, `optional`): The list of keys in your dictionary of inputs that correspond to the
|
||||
labels.
|
||||
(Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.)
|
||||
label_names (:obj:`List[str]`, `optional`):
|
||||
The list of keys in your dictionary of inputs that correspond to the labels.
|
||||
|
||||
Will eventually default to :obj:`["labels"]` except if the model used is one of the
|
||||
:obj:`XxxForQuestionAnswering` in which case it will default to :obj:`["start_positions",
|
||||
"end_positions"]`. load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or
|
||||
not to load the best model found during training at the end of training.
|
||||
"end_positions"]`.
|
||||
load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether or not to load the best model found during training at the end of training.
|
||||
|
||||
.. note::
|
||||
|
||||
When set to :obj:`True`, the parameters :obj:`save_steps` will be ignored and the model will be saved
|
||||
after each evaluation.
|
||||
metric_for_best_model (:obj:`str`, `optional`)
|
||||
metric_for_best_model (:obj:`str`, `optional`):
|
||||
Use in conjunction with :obj:`load_best_model_at_end` to specify the metric to use to compare two different
|
||||
models. Must be the name of a metric returned by the evaluation with or without the prefix :obj:`"eval_"`.
|
||||
Will default to :obj:`"loss"` if unspecified and :obj:`load_best_model_at_end=True` (to use the evaluation
|
||||
loss).
|
||||
|
||||
If you set this value, :obj:`greater_is_better` will default to :obj:`True`. Don't forget to set it to
|
||||
:obj:`False` if your metric is better when lower. greater_is_better (:obj:`bool`, `optional`) Use in
|
||||
conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better models
|
||||
should have a greater metric or not. Will default to:
|
||||
:obj:`False` if your metric is better when lower.
|
||||
greater_is_better (:obj:`bool`, `optional`):
|
||||
Use in conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better
|
||||
models should have a greater metric or not. Will default to:
|
||||
|
||||
- :obj:`True` if :obj:`metric_for_best_model` is set to a value that isn't :obj:`"loss"` or
|
||||
:obj:`"eval_loss"`.
|
||||
|
||||
@@ -312,10 +312,11 @@ class DocstringStyler(CodeStyler):
|
||||
"""Class to style docstrings that take the main method from `CodeStyler`."""
|
||||
|
||||
def is_no_style_block(self, line):
|
||||
if _re_textual_blocks.search(line) is not None:
|
||||
return False
|
||||
if _re_example.search(line) is not None:
|
||||
return True
|
||||
return _re_code_block.search(line) is not None
|
||||
# return super().is_no_style_block(line) is not None
|
||||
|
||||
def is_comment_or_textual_block(self, line):
|
||||
if _re_return.search(line) is not None:
|
||||
|
||||
Reference in New Issue
Block a user