Update Perceiver code examples (#14783)

* Fix code examples * Fix code example
2021-12-15 17:06:38 +01:00
parent 48d4827697
commit 50bc57cef8
2 changed files with 122 additions and 19 deletions
--- a/docs/source/model_doc/perceiver.mdx
+++ b/docs/source/model_doc/perceiver.mdx
@@ -81,9 +81,10 @@ Tips:
 - The quickest way to get started with the Perceiver is by checking the [tutorial
  notebooks](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/Perceiver).
- Note that the models available in the library only showcase some examples of what you can do with the Perceiver.
+- Refer to the [blog post](https://huggingface.co/blog/perceiver) if you want to fully understand how the model works and
-  There are many more use cases, including question answering,
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
-named-entity recognition, object detection, audio classification, video classification, etc.
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection, 
 audio classification, video classification, etc. 
 ## Perceiver specific outputs
@@ -102,10 +103,7 @@ named-entity recognition, object detection, audio classification, video classifi
 ## PerceiverTokenizer
 [[autodoc]] PerceiverTokenizer
-    - build_inputs_with_special_tokens
+    - __call__
    - get_special_tokens_mask
    - create_token_type_ids_from_sequences
    - save_vocabulary
 ## PerceiverFeatureExtractor
--- a/src/transformers/models/perceiver/modeling_perceiver.py
+++ b/src/transformers/models/perceiver/modeling_perceiver.py
@@ -757,12 +757,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
            self.encoder.layer[layer].attention.prune_heads(heads)
    @add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
-    @add_code_sample_docstrings(
+    @replace_return_docstrings(output_type=PerceiverModelOutput, config_class=_CONFIG_FOR_DOC)
        processor_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=PerceiverModelOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        inputs,
@@ -773,6 +768,85 @@ class PerceiverModel(PerceiverPreTrainedModel):
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        Returns:
        Examples::
            >>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel
            >>> from transformers.models.perceiver.modeling_perceiver import PerceiverTextPreprocessor, PerceiverImagePreprocessor, PerceiverClassificationDecoder
            >>> import torch
            >>> import requests
            >>> from PIL import Image
            >>> # EXAMPLE 1: using the Perceiver to classify texts
            >>> # - we define a TextPreprocessor, which can be used to embed tokens
            >>> # - we define a ClassificationDecoder, which can be used to decode the
            >>> # final hidden states of the latents to classification logits
            >>> # using trainable position embeddings
            >>> config = PerceiverConfig()
            >>> preprocessor = PerceiverTextPreprocessor(config)
            >>> decoder = PerceiverClassificationDecoder(config,
            ...                                          num_channels=config.d_latents,
            ...                                          trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
            ...                                          use_query_residual=True)
            >>> model = PerceiverModel(config, input_preprocessor=preprocessor, decoder=decoder)
            >>> # you can then do a forward pass as follows:
            >>> tokenizer = PerceiverTokenizer()
            >>> text = "hello world"
            >>> inputs = tokenizer(text, return_tensors="pt").input_ids
            >>> with torch.no_grad():
            >>>    outputs = model(inputs=inputs)
            >>> logits = outputs.logits
            >>> # to train, one can train the model using standard cross-entropy:
            >>> criterion = torch.nn.CrossEntropyLoss()
            >>> labels = torch.tensor([1])
            >>> loss = criterion(logits, labels)
            >>> # EXAMPLE 2: using the Perceiver to classify images
            >>> # - we define an ImagePreprocessor, which can be used to embed images
            >>> preprocessor=PerceiverImagePreprocessor(
                            config,
                            prep_type="conv1x1",
                            spatial_downsample=1,
                            out_channels=256,
                            position_encoding_type="trainable",
                            concat_or_add_pos="concat",
                            project_pos_dim=256,
                            trainable_position_encoding_kwargs=dict(num_channels=256, index_dims=config.image_size ** 2),
                        )
            >>> model = PerceiverModel(
            ...         config,
            ...         input_preprocessor=preprocessor,
            ...         decoder=PerceiverClassificationDecoder(
            ...              config,
            ...              num_channels=config.d_latents,
            ...              trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
            ...              use_query_residual=True,
            ...          ),
            ... )
            >>> # you can then do a forward pass as follows:
            >>> feature_extractor = PerceiverFeatureExtractor()
            >>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
            >>> image = Image.open(requests.get(url, stream=True).raw)
            >>> inputs = feature_extractor(image, return_tensors="pt").pixel_values
            >>> with torch.no_grad():
            >>>    outputs = model(inputs=inputs)
            >>> logits = outputs.logits
            >>> # to train, one can train the model using standard cross-entropy:
            >>> criterion = torch.nn.CrossEntropyLoss()
            >>> labels = torch.tensor([1])
            >>> loss = criterion(logits, labels)
        """
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (
            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@@ -901,12 +975,7 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
        self.post_init()
    @add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
-    @add_code_sample_docstrings(
+    @replace_return_docstrings(output_type=PerceiverMaskedLMOutput, config_class=_CONFIG_FOR_DOC)
        processor_class=_TOKENIZER_FOR_DOC,
        checkpoint=_CHECKPOINT_FOR_DOC,
        output_type=PerceiverMaskedLMOutput,
        config_class=_CONFIG_FOR_DOC,
    )
    def forward(
        self,
        inputs=None,
@@ -923,6 +992,42 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
            Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
            config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
            (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
        Returns:
        Examples::
            >>> from transformers import PerceiverTokenizer, PerceiverForMaskedLM
            >>> import torch
            >>> tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
            >>> model = PerceiverForMaskedLM.from_pretrained('deepmind/language-perceiver')
            >>> # training
            >>> text = "This is an incomplete sentence where some words are missing."
            >>> inputs = tokenizer(text, padding="max_length", return_tensors="pt")
            >>> # mask " missing."
            >>> inputs['input_ids'][0, 52:61] = tokenizer.mask_token_id
            >>> labels = tokenizer(text, padding="max_length", return_tensors="pt").input_ids
            >>> outputs = model(**inputs, labels=labels)
            >>> loss = outputs.loss
            >>> logits = outputs.logits
            >>> # inference
            >>> text = "This is an incomplete sentence where some words are missing."
            >>> encoding = tokenizer(text, padding="max_length", return_tensors="pt")
            >>> # mask bytes corresponding to " missing.". Note that the model performs much better if the masked span starts with a space.
            >>> encoding['input_ids'][0, 52:61] = tokenizer.mask_token_id
            >>> # forward pass
            >>> with torch.no_grad():
            >>>    outputs = model(**encoding)
            >>> logits = outputs.logits
            >>> masked_tokens_predictions = logits[0, 52:61].argmax(dim=-1).tolist()
            >>> tokenizer.decode(masked_tokens_predictions)
            ' missing.'
        """
        if inputs is not None and input_ids is not None:
            raise ValueError("You cannot use both `inputs` and `input_ids`")