From d406a2729af35d97eeca6dafa411a1322d56a9f8 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Wed, 2 Jun 2021 09:21:05 -0700 Subject: [PATCH] [docs] fix xref to `PreTrainedModel.generate` (#11049) * fix xref to generate * do the same for search methods * style * style --- docs/source/internal/generation_utils.rst | 14 ++++++++------ docs/source/model_doc/bart.rst | 2 +- docs/source/model_doc/t5.rst | 8 ++++---- docs/source/task_summary.rst | 8 ++++---- src/transformers/models/rag/modeling_rag.py | 17 ++++++++++------- src/transformers/models/rag/modeling_tf_rag.py | 17 ++++++++++------- 6 files changed, 37 insertions(+), 29 deletions(-) diff --git a/docs/source/internal/generation_utils.rst b/docs/source/internal/generation_utils.rst index fe066e456d..04543a48be 100644 --- a/docs/source/internal/generation_utils.rst +++ b/docs/source/internal/generation_utils.rst @@ -13,19 +13,21 @@ Utilities for Generation ----------------------------------------------------------------------------------------------------------------------- -This page lists all the utility functions used by :meth:`~transformers.PreTrainedModel.generate`, -:meth:`~transformers.PreTrainedModel.greedy_search`, :meth:`~transformers.PreTrainedModel.sample`, -:meth:`~transformers.PreTrainedModel.beam_search`, :meth:`~transformers.PreTrainedModel.beam_sample`, and -:meth:`~transformers.PreTrainedModel.group_beam_search`. +This page lists all the utility functions used by :meth:`~transformers.generation_utils.GenerationMixin.generate`, +:meth:`~transformers.generation_utils.GenerationMixin.greedy_search`, +:meth:`~transformers.generation_utils.GenerationMixin.sample`, +:meth:`~transformers.generation_utils.GenerationMixin.beam_search`, +:meth:`~transformers.generation_utils.GenerationMixin.beam_sample`, and +:meth:`~transformers.generation_utils.GenerationMixin.group_beam_search`. Most of those are only useful if you are studying the code of the generate methods in the library. Generate Outputs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The output of :meth:`~transformers.PreTrainedModel.generate` is an instance of a subclass of +The output of :meth:`~transformers.generation_utils.GenerationMixin.generate` is an instance of a subclass of :class:`~transformers.file_utils.ModelOutput`. This output is a data structure containing all the information returned -by :meth:`~transformers.PreTrainedModel.generate`, but that can also be used as tuple or dictionary. +by :meth:`~transformers.generation_utils.GenerationMixin.generate`, but that can also be used as tuple or dictionary. Here's an example: diff --git a/docs/source/model_doc/bart.rst b/docs/source/model_doc/bart.rst index f863fe997f..c96e57e29e 100644 --- a/docs/source/model_doc/bart.rst +++ b/docs/source/model_doc/bart.rst @@ -61,7 +61,7 @@ Implementation Notes - Model predictions are intended to be identical to the original implementation when :obj:`force_bos_token_to_be_generated=True`. This only works, however, if the string you pass to :func:`fairseq.encode` starts with a space. -- :meth:`~transformers.BartForConditionalGeneration.generate` should be used for conditional generation tasks like +- :meth:`~transformers.generation_utils.GenerationMixin.generate` should be used for conditional generation tasks like summarization, see the example in that docstrings. - Models that load the `facebook/bart-large-cnn` weights will not have a :obj:`mask_token_id`, or be able to perform mask-filling tasks. diff --git a/docs/source/model_doc/t5.rst b/docs/source/model_doc/t5.rst index fe8d2c4053..7defbdbb74 100644 --- a/docs/source/model_doc/t5.rst +++ b/docs/source/model_doc/t5.rst @@ -1,4 +1,4 @@ -.. +.. Copyright 2020 The HuggingFace Team. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with @@ -44,9 +44,9 @@ Tips: For more information about which prefix to use, it is easiest to look into Appendix D of the `paper `__. - For sequence-to-sequence generation, it is recommended to use - :obj:`T5ForConditionalGeneration.generate()`. This method takes care of feeding the encoded input via cross-attention - layers to the decoder and auto-regressively generates the decoder output. - T5 uses relative scalar embeddings. - Encoder input padding can be done on the left and on the right. + :meth:`~transformers.generation_utils.GenerationMixin.generate`. This method takes care of feeding the encoded input + via cross-attention layers to the decoder and auto-regressively generates the decoder output. - T5 uses relative + scalar embeddings. Encoder input padding can be done on the left and on the right. This model was contributed by `thomwolf `__. The original code can be found `here `__. diff --git a/docs/source/task_summary.rst b/docs/source/task_summary.rst index aaee0d988f..93a6716b65 100644 --- a/docs/source/task_summary.rst +++ b/docs/source/task_summary.rst @@ -1,4 +1,4 @@ -.. +.. Copyright 2020 The HuggingFace Team. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with @@ -505,8 +505,8 @@ This outputs a (hopefully) coherent next token following the original sequence, >>> print(resulting_string) Hugging Face is based in DUMBO, New York City, and has -In the next section, we show how :func:`~transformers.PreTrainedModel.generate` can be used to generate multiple tokens -up to a specified length instead of one token at a time. +In the next section, we show how :func:`~transformers.generation_utils.GenerationMixin.generate` can be used to +generate multiple tokens up to a specified length instead of one token at a time. Text Generation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -629,7 +629,7 @@ It leverages a fine-tuned model on CoNLL-2003, fine-tuned by `@stefan-it >> ner_pipe = pipeline("ner") - >>> sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, + >>> sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, ... therefore very close to the Manhattan Bridge which is visible from the window.""" diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 8caf9ecdd9..02c4a2a28f 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -906,8 +906,9 @@ class RagSequenceForGeneration(RagPreTrainedModel): **model_kwargs ): """ - Implements RAG sequence "thorough" decoding. Read the :meth:`~transformers.PreTrainedModel.generate`` - documentation for more information on how to set other generate input parameters. + Implements RAG sequence "thorough" decoding. Read the + :meth:`~transformers.generation_utils.GenerationMixin.generate`` documentation for more information on how to + set other generate input parameters. Args: input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -942,14 +943,15 @@ class RagSequenceForGeneration(RagPreTrainedModel): to be set to :obj:`False` if used while training with distributed backend. num_return_sequences(:obj:`int`, `optional`, defaults to 1): The number of independently computed returned sequences for each element in the batch. Note that this - is not the value we pass to the ``generator``'s `:func:`~transformers.PreTrainedModel.generate`` - function, where we set ``num_return_sequences`` to :obj:`num_beams`. + is not the value we pass to the ``generator``'s + `:func:`~transformers.generation_utils.GenerationMixin.generate`` function, where we set + ``num_return_sequences`` to :obj:`num_beams`. num_beams (:obj:`int`, `optional`, defaults to 1): Number of beams for beam search. 1 means no beam search. n_docs (:obj:`int`, `optional`, defaults to :obj:`config.n_docs`) Number of documents to retrieve and/or number of documents for which to generate an answer. kwargs: - Additional kwargs will be passed to :meth:`~transformers.PreTrainedModel.generate`. + Additional kwargs will be passed to :meth:`~transformers.generation_utils.GenerationMixin.generate`. Return: :obj:`torch.LongTensor` of shape :obj:`(batch_size * num_return_sequences, sequence_length)`: The generated @@ -1452,8 +1454,9 @@ class RagTokenForGeneration(RagPreTrainedModel): enabled. num_return_sequences(:obj:`int`, `optional`, defaults to 1): The number of independently computed returned sequences for each element in the batch. Note that this - is not the value we pass to the ``generator``'s `:func:`~transformers.PreTrainedModel.generate` - function, where we set ``num_return_sequences`` to :obj:`num_beams`. + is not the value we pass to the ``generator``'s + `:func:`~transformers.generation_utils.GenerationMixin.generate` function, where we set + ``num_return_sequences`` to :obj:`num_beams`. decoder_start_token_id (:obj:`int`, `optional`): If an encoder-decoder model starts decoding with a different token than `bos`, the id of that token. n_docs (:obj:`int`, `optional`, defaults to :obj:`config.n_docs`) diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 4d452b6359..00e4690da9 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -1130,8 +1130,9 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss Number of beams for beam search. 1 means no beam search. num_return_sequences(:obj:`int`, `optional`, defaults to 1): The number of independently computed returned sequences for each element in the batch. Note that this - is not the value we pass to the ``generator``'s `:func:`~transformers.PreTrainedModel.generate` - function, where we set ``num_return_sequences`` to :obj:`num_beams`. + is not the value we pass to the ``generator``'s + `:func:`~transformers.generation_utils.GenerationMixin.generate` function, where we set + ``num_return_sequences`` to :obj:`num_beams`. decoder_start_token_id (:obj:`int`, `optional`): If an encoder-decoder model starts decoding with a different token than `bos`, the id of that token. n_docs (:obj:`int`, `optional`, defaults to :obj:`config.n_docs`) @@ -1682,8 +1683,9 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL **model_kwargs ): """ - Implements RAG sequence "thorough" decoding. Read the :meth:`~transformers.PreTrainedModel.generate`` - documentation for more information on how to set other generate input parameters + Implements RAG sequence "thorough" decoding. Read the + :meth:`~transformers.generation_utils.GenerationMixin.generate`` documentation for more information on how to + set other generate input parameters Args: input_ids (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -1711,14 +1713,15 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL to be set to :obj:`False` if used while training with distributed backend. num_return_sequences(:obj:`int`, `optional`, defaults to 1): The number of independently computed returned sequences for each element in the batch. Note that this - is not the value we pass to the ``generator``'s `:func:`~transformers.PreTrainedModel.generate`` - function, where we set ``num_return_sequences`` to :obj:`num_beams`. + is not the value we pass to the ``generator``'s + `:func:`~transformers.generation_utils.GenerationMixin.generate`` function, where we set + ``num_return_sequences`` to :obj:`num_beams`. num_beams (:obj:`int`, `optional`, defaults to 1): Number of beams for beam search. 1 means no beam search. n_docs (:obj:`int`, `optional`, defaults to :obj:`config.n_docs`) Number of documents to retrieve and/or number of documents for which to generate an answer. kwargs: - Additional kwargs will be passed to :meth:`~transformers.PreTrainedModel.generate` + Additional kwargs will be passed to :meth:`~transformers.generation_utils.GenerationMixin.generate` Return: :obj:`tf.Tensor` of shape :obj:`(batch_size * num_return_sequences, sequence_length)`: The generated