Add prefix to examples in model_doc rst (#11226)
* Add prefix to examples in model_doc rst * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -38,22 +38,22 @@ Usage:
|
||||
|
||||
.. code-block::
|
||||
|
||||
# leverage checkpoints for Bert2Bert model...
|
||||
# use BERT's cls token as BOS token and sep token as EOS token
|
||||
encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
||||
# add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
||||
decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
||||
bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||
>>> # leverage checkpoints for Bert2Bert model...
|
||||
>>> # use BERT's cls token as BOS token and sep token as EOS token
|
||||
>>> encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
||||
>>> # add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
||||
>>> decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
||||
>>> bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||
|
||||
# create tokenizer...
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
||||
>>> # create tokenizer...
|
||||
>>> tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
||||
|
||||
input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
||||
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
||||
>>> input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
||||
>>> labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
||||
|
||||
# train...
|
||||
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
||||
loss.backward()
|
||||
>>> # train...
|
||||
>>> loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
||||
>>> loss.backward()
|
||||
|
||||
|
||||
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
|
||||
@@ -61,15 +61,15 @@ Usage:
|
||||
|
||||
.. code-block::
|
||||
|
||||
# instantiate sentence fusion model
|
||||
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||
>>> # instantiate sentence fusion model
|
||||
>>> sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||
|
||||
input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
||||
>>> input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
||||
|
||||
outputs = sentence_fuser.generate(input_ids)
|
||||
>>> outputs = sentence_fuser.generate(input_ids)
|
||||
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
>>> print(tokenizer.decode(outputs[0]))
|
||||
|
||||
|
||||
Tips:
|
||||
|
||||
@@ -31,28 +31,28 @@ Example of use:
|
||||
|
||||
.. code-block::
|
||||
|
||||
import torch
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
>>> import torch
|
||||
>>> from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
||||
>>> bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
||||
|
||||
# For transformers v4.x+:
|
||||
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
||||
>>> # For transformers v4.x+:
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
||||
|
||||
# For transformers v3.x:
|
||||
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
||||
>>> # For transformers v3.x:
|
||||
>>> # tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
||||
|
||||
# INPUT TWEET IS ALREADY NORMALIZED!
|
||||
line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
||||
>>> # INPUT TWEET IS ALREADY NORMALIZED!
|
||||
>>> line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
||||
|
||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
||||
>>> input_ids = torch.tensor([tokenizer.encode(line)])
|
||||
|
||||
with torch.no_grad():
|
||||
features = bertweet(input_ids) # Models outputs are now tuples
|
||||
>>> with torch.no_grad():
|
||||
... features = bertweet(input_ids) # Models outputs are now tuples
|
||||
|
||||
## With TensorFlow 2.0+:
|
||||
# from transformers import TFAutoModel
|
||||
# bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
||||
>>> # With TensorFlow 2.0+:
|
||||
>>> # from transformers import TFAutoModel
|
||||
>>> # bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
||||
|
||||
|
||||
The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
|
||||
|
||||
@@ -40,20 +40,20 @@ Examples of use:
|
||||
|
||||
.. code-block::
|
||||
|
||||
from transformers import HerbertTokenizer, RobertaModel
|
||||
>>> from transformers import HerbertTokenizer, RobertaModel
|
||||
|
||||
tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||
model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||
>>> tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||
>>> model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||
|
||||
encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
||||
outputs = model(encoded_input)
|
||||
>>> encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
||||
>>> outputs = model(encoded_input)
|
||||
|
||||
# HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
||||
import torch
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
>>> # HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
||||
>>> import torch
|
||||
>>> from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||
model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||
>>> model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||
|
||||
|
||||
The original code can be found `here <https://github.com/allegro/HerBERT>`__.
|
||||
|
||||
@@ -31,23 +31,23 @@ Example of use:
|
||||
|
||||
.. code-block::
|
||||
|
||||
import torch
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
>>> import torch
|
||||
>>> from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
||||
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
||||
>>> phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
||||
|
||||
# INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
||||
line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
||||
>>> # INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
||||
>>> line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
||||
|
||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
||||
>>> input_ids = torch.tensor([tokenizer.encode(line)])
|
||||
|
||||
with torch.no_grad():
|
||||
features = phobert(input_ids) # Models outputs are now tuples
|
||||
>>> with torch.no_grad():
|
||||
... features = phobert(input_ids) # Models outputs are now tuples
|
||||
|
||||
## With TensorFlow 2.0+:
|
||||
# from transformers import TFAutoModel
|
||||
# phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
||||
>>> # With TensorFlow 2.0+:
|
||||
>>> # from transformers import TFAutoModel
|
||||
>>> # phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
||||
|
||||
|
||||
The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
|
||||
|
||||
Reference in New Issue
Block a user