Update all references to canonical models (#29001)

* Script & Manual edition

* Update
This commit is contained in:
Lysandre Debut
2024-02-16 08:16:58 +01:00
committed by GitHub
parent 1e402b957d
commit f497f564bb
561 changed files with 2682 additions and 2687 deletions

View File

@@ -64,15 +64,15 @@ for summarization: *summarize: ...*.
T5 comes in different sizes:
- [t5-small](https://huggingface.co/t5-small)
- [google-t5/t5-small](https://huggingface.co/google-t5/t5-small)
- [t5-base](https://huggingface.co/t5-base)
- [google-t5/t5-base](https://huggingface.co/google-t5/t5-base)
- [t5-large](https://huggingface.co/t5-large)
- [google-t5/t5-large](https://huggingface.co/google-t5/t5-large)
- [t5-3b](https://huggingface.co/t5-3b)
- [google-t5/t5-3b](https://huggingface.co/google-t5/t5-3b)
- [t5-11b](https://huggingface.co/t5-11b).
- [google-t5/t5-11b](https://huggingface.co/google-t5/t5-11b).
Based on the original T5 model, Google has released some follow-up works:
@@ -121,8 +121,8 @@ processed as follows:
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids
>>> labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="pt").input_ids
@@ -146,8 +146,8 @@ the model as follows:
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
>>> labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
@@ -183,8 +183,8 @@ ignored. The code example below illustrates all of this.
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> import torch
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> # the following 2 hyperparameters are task-specific
>>> max_source_length = 512
@@ -258,8 +258,8 @@ generation works in general in encoder-decoder models.
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
>>> outputs = model.generate(input_ids)
@@ -275,8 +275,8 @@ The example above only shows a single example. You can also do batched inference
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> task_prefix = "translate English to German: "
>>> # use different length sentences to test batching
@@ -301,8 +301,8 @@ The predicted tokens will then be placed between the sentinel tokens.
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
>>> input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids