Add prefix to examples in model_doc rst (#11226)
* Add prefix to examples in model_doc rst * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
@@ -38,22 +38,22 @@ Usage:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
# leverage checkpoints for Bert2Bert model...
|
>>> # leverage checkpoints for Bert2Bert model...
|
||||||
# use BERT's cls token as BOS token and sep token as EOS token
|
>>> # use BERT's cls token as BOS token and sep token as EOS token
|
||||||
encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
>>> encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
|
||||||
# add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
>>> # add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
|
||||||
decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
>>> decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
|
||||||
bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
>>> bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||||
|
|
||||||
# create tokenizer...
|
>>> # create tokenizer...
|
||||||
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
>>> tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
|
||||||
|
|
||||||
input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
>>> input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
|
||||||
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
>>> labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
|
||||||
|
|
||||||
# train...
|
>>> # train...
|
||||||
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
>>> loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
|
||||||
loss.backward()
|
>>> loss.backward()
|
||||||
|
|
||||||
|
|
||||||
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
|
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
|
||||||
@@ -61,15 +61,15 @@ Usage:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
# instantiate sentence fusion model
|
>>> # instantiate sentence fusion model
|
||||||
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
>>> sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||||
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
>>> tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
|
||||||
|
|
||||||
input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
>>> input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
|
||||||
|
|
||||||
outputs = sentence_fuser.generate(input_ids)
|
>>> outputs = sentence_fuser.generate(input_ids)
|
||||||
|
|
||||||
print(tokenizer.decode(outputs[0]))
|
>>> print(tokenizer.decode(outputs[0]))
|
||||||
|
|
||||||
|
|
||||||
Tips:
|
Tips:
|
||||||
|
|||||||
@@ -31,28 +31,28 @@ Example of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
import torch
|
>>> import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
>>> from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
>>> bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
# For transformers v4.x+:
|
>>> # For transformers v4.x+:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
>>> tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
|
||||||
|
|
||||||
# For transformers v3.x:
|
>>> # For transformers v3.x:
|
||||||
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
>>> # tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
# INPUT TWEET IS ALREADY NORMALIZED!
|
>>> # INPUT TWEET IS ALREADY NORMALIZED!
|
||||||
line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
>>> line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
|
||||||
|
|
||||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
>>> input_ids = torch.tensor([tokenizer.encode(line)])
|
||||||
|
|
||||||
with torch.no_grad():
|
>>> with torch.no_grad():
|
||||||
features = bertweet(input_ids) # Models outputs are now tuples
|
... features = bertweet(input_ids) # Models outputs are now tuples
|
||||||
|
|
||||||
## With TensorFlow 2.0+:
|
>>> # With TensorFlow 2.0+:
|
||||||
# from transformers import TFAutoModel
|
>>> # from transformers import TFAutoModel
|
||||||
# bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
>>> # bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
|
The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
|
||||||
|
|||||||
@@ -40,20 +40,20 @@ Examples of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
from transformers import HerbertTokenizer, RobertaModel
|
>>> from transformers import HerbertTokenizer, RobertaModel
|
||||||
|
|
||||||
tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
>>> tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||||
model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
>>> model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||||
|
|
||||||
encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
>>> encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
|
||||||
outputs = model(encoded_input)
|
>>> outputs = model(encoded_input)
|
||||||
|
|
||||||
# HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
>>> # HerBERT can also be loaded using AutoTokenizer and AutoModel:
|
||||||
import torch
|
>>> import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
>>> from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
>>> tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
|
||||||
model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
>>> model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/allegro/HerBERT>`__.
|
The original code can be found `here <https://github.com/allegro/HerBERT>`__.
|
||||||
|
|||||||
@@ -31,23 +31,23 @@ Example of use:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
import torch
|
>>> import torch
|
||||||
from transformers import AutoModel, AutoTokenizer
|
>>> from transformers import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
>>> phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
||||||
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
|
||||||
|
|
||||||
# INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
>>> # INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
|
||||||
line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
>>> line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
|
||||||
|
|
||||||
input_ids = torch.tensor([tokenizer.encode(line)])
|
>>> input_ids = torch.tensor([tokenizer.encode(line)])
|
||||||
|
|
||||||
with torch.no_grad():
|
>>> with torch.no_grad():
|
||||||
features = phobert(input_ids) # Models outputs are now tuples
|
... features = phobert(input_ids) # Models outputs are now tuples
|
||||||
|
|
||||||
## With TensorFlow 2.0+:
|
>>> # With TensorFlow 2.0+:
|
||||||
# from transformers import TFAutoModel
|
>>> # from transformers import TFAutoModel
|
||||||
# phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
>>> # phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
|
||||||
|
|
||||||
|
|
||||||
The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
|
The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
|
||||||
|
|||||||
Reference in New Issue
Block a user