Update Perceiver code examples (#14783)
* Fix code examples * Fix code example
This commit is contained in:
@@ -81,9 +81,10 @@ Tips:
|
|||||||
|
|
||||||
- The quickest way to get started with the Perceiver is by checking the [tutorial
|
- The quickest way to get started with the Perceiver is by checking the [tutorial
|
||||||
notebooks](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/Perceiver).
|
notebooks](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/Perceiver).
|
||||||
- Note that the models available in the library only showcase some examples of what you can do with the Perceiver.
|
- Refer to the [blog post](https://huggingface.co/blog/perceiver) if you want to fully understand how the model works and
|
||||||
There are many more use cases, including question answering,
|
is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
|
||||||
named-entity recognition, object detection, audio classification, video classification, etc.
|
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
|
||||||
|
audio classification, video classification, etc.
|
||||||
|
|
||||||
## Perceiver specific outputs
|
## Perceiver specific outputs
|
||||||
|
|
||||||
@@ -102,10 +103,7 @@ named-entity recognition, object detection, audio classification, video classifi
|
|||||||
## PerceiverTokenizer
|
## PerceiverTokenizer
|
||||||
|
|
||||||
[[autodoc]] PerceiverTokenizer
|
[[autodoc]] PerceiverTokenizer
|
||||||
- build_inputs_with_special_tokens
|
- __call__
|
||||||
- get_special_tokens_mask
|
|
||||||
- create_token_type_ids_from_sequences
|
|
||||||
- save_vocabulary
|
|
||||||
|
|
||||||
## PerceiverFeatureExtractor
|
## PerceiverFeatureExtractor
|
||||||
|
|
||||||
|
|||||||
@@ -757,12 +757,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
|
|||||||
self.encoder.layer[layer].attention.prune_heads(heads)
|
self.encoder.layer[layer].attention.prune_heads(heads)
|
||||||
|
|
||||||
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
|
||||||
@add_code_sample_docstrings(
|
@replace_return_docstrings(output_type=PerceiverModelOutput, config_class=_CONFIG_FOR_DOC)
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
|
||||||
output_type=PerceiverModelOutput,
|
|
||||||
config_class=_CONFIG_FOR_DOC,
|
|
||||||
)
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
inputs,
|
inputs,
|
||||||
@@ -773,6 +768,85 @@ class PerceiverModel(PerceiverPreTrainedModel):
|
|||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
):
|
):
|
||||||
|
r"""
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel
|
||||||
|
>>> from transformers.models.perceiver.modeling_perceiver import PerceiverTextPreprocessor, PerceiverImagePreprocessor, PerceiverClassificationDecoder
|
||||||
|
>>> import torch
|
||||||
|
>>> import requests
|
||||||
|
>>> from PIL import Image
|
||||||
|
|
||||||
|
>>> # EXAMPLE 1: using the Perceiver to classify texts
|
||||||
|
>>> # - we define a TextPreprocessor, which can be used to embed tokens
|
||||||
|
>>> # - we define a ClassificationDecoder, which can be used to decode the
|
||||||
|
>>> # final hidden states of the latents to classification logits
|
||||||
|
>>> # using trainable position embeddings
|
||||||
|
>>> config = PerceiverConfig()
|
||||||
|
>>> preprocessor = PerceiverTextPreprocessor(config)
|
||||||
|
>>> decoder = PerceiverClassificationDecoder(config,
|
||||||
|
... num_channels=config.d_latents,
|
||||||
|
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
|
||||||
|
... use_query_residual=True)
|
||||||
|
>>> model = PerceiverModel(config, input_preprocessor=preprocessor, decoder=decoder)
|
||||||
|
|
||||||
|
>>> # you can then do a forward pass as follows:
|
||||||
|
>>> tokenizer = PerceiverTokenizer()
|
||||||
|
>>> text = "hello world"
|
||||||
|
>>> inputs = tokenizer(text, return_tensors="pt").input_ids
|
||||||
|
|
||||||
|
>>> with torch.no_grad():
|
||||||
|
>>> outputs = model(inputs=inputs)
|
||||||
|
>>> logits = outputs.logits
|
||||||
|
|
||||||
|
>>> # to train, one can train the model using standard cross-entropy:
|
||||||
|
>>> criterion = torch.nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
>>> labels = torch.tensor([1])
|
||||||
|
>>> loss = criterion(logits, labels)
|
||||||
|
|
||||||
|
>>> # EXAMPLE 2: using the Perceiver to classify images
|
||||||
|
>>> # - we define an ImagePreprocessor, which can be used to embed images
|
||||||
|
>>> preprocessor=PerceiverImagePreprocessor(
|
||||||
|
config,
|
||||||
|
prep_type="conv1x1",
|
||||||
|
spatial_downsample=1,
|
||||||
|
out_channels=256,
|
||||||
|
position_encoding_type="trainable",
|
||||||
|
concat_or_add_pos="concat",
|
||||||
|
project_pos_dim=256,
|
||||||
|
trainable_position_encoding_kwargs=dict(num_channels=256, index_dims=config.image_size ** 2),
|
||||||
|
)
|
||||||
|
|
||||||
|
>>> model = PerceiverModel(
|
||||||
|
... config,
|
||||||
|
... input_preprocessor=preprocessor,
|
||||||
|
... decoder=PerceiverClassificationDecoder(
|
||||||
|
... config,
|
||||||
|
... num_channels=config.d_latents,
|
||||||
|
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
|
||||||
|
... use_query_residual=True,
|
||||||
|
... ),
|
||||||
|
... )
|
||||||
|
|
||||||
|
>>> # you can then do a forward pass as follows:
|
||||||
|
>>> feature_extractor = PerceiverFeatureExtractor()
|
||||||
|
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
|
||||||
|
>>> image = Image.open(requests.get(url, stream=True).raw)
|
||||||
|
>>> inputs = feature_extractor(image, return_tensors="pt").pixel_values
|
||||||
|
|
||||||
|
>>> with torch.no_grad():
|
||||||
|
>>> outputs = model(inputs=inputs)
|
||||||
|
>>> logits = outputs.logits
|
||||||
|
|
||||||
|
>>> # to train, one can train the model using standard cross-entropy:
|
||||||
|
>>> criterion = torch.nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
>>> labels = torch.tensor([1])
|
||||||
|
>>> loss = criterion(logits, labels)
|
||||||
|
"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||||
@@ -901,12 +975,7 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
|
|||||||
self.post_init()
|
self.post_init()
|
||||||
|
|
||||||
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
|
||||||
@add_code_sample_docstrings(
|
@replace_return_docstrings(output_type=PerceiverMaskedLMOutput, config_class=_CONFIG_FOR_DOC)
|
||||||
processor_class=_TOKENIZER_FOR_DOC,
|
|
||||||
checkpoint=_CHECKPOINT_FOR_DOC,
|
|
||||||
output_type=PerceiverMaskedLMOutput,
|
|
||||||
config_class=_CONFIG_FOR_DOC,
|
|
||||||
)
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
inputs=None,
|
inputs=None,
|
||||||
@@ -923,6 +992,42 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
|
|||||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
>>> from transformers import PerceiverTokenizer, PerceiverForMaskedLM
|
||||||
|
>>> import torch
|
||||||
|
|
||||||
|
>>> tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
|
||||||
|
>>> model = PerceiverForMaskedLM.from_pretrained('deepmind/language-perceiver')
|
||||||
|
|
||||||
|
>>> # training
|
||||||
|
>>> text = "This is an incomplete sentence where some words are missing."
|
||||||
|
>>> inputs = tokenizer(text, padding="max_length", return_tensors="pt")
|
||||||
|
>>> # mask " missing."
|
||||||
|
>>> inputs['input_ids'][0, 52:61] = tokenizer.mask_token_id
|
||||||
|
>>> labels = tokenizer(text, padding="max_length", return_tensors="pt").input_ids
|
||||||
|
|
||||||
|
>>> outputs = model(**inputs, labels=labels)
|
||||||
|
>>> loss = outputs.loss
|
||||||
|
>>> logits = outputs.logits
|
||||||
|
|
||||||
|
>>> # inference
|
||||||
|
>>> text = "This is an incomplete sentence where some words are missing."
|
||||||
|
>>> encoding = tokenizer(text, padding="max_length", return_tensors="pt")
|
||||||
|
|
||||||
|
>>> # mask bytes corresponding to " missing.". Note that the model performs much better if the masked span starts with a space.
|
||||||
|
>>> encoding['input_ids'][0, 52:61] = tokenizer.mask_token_id
|
||||||
|
|
||||||
|
>>> # forward pass
|
||||||
|
>>> with torch.no_grad():
|
||||||
|
>>> outputs = model(**encoding)
|
||||||
|
>>> logits = outputs.logits
|
||||||
|
|
||||||
|
>>> masked_tokens_predictions = logits[0, 52:61].argmax(dim=-1).tolist()
|
||||||
|
>>> tokenizer.decode(masked_tokens_predictions)
|
||||||
|
' missing.'
|
||||||
"""
|
"""
|
||||||
if inputs is not None and input_ids is not None:
|
if inputs is not None and input_ids is not None:
|
||||||
raise ValueError("You cannot use both `inputs` and `input_ids`")
|
raise ValueError("You cannot use both `inputs` and `input_ids`")
|
||||||
|
|||||||
Reference in New Issue
Block a user