Update T5gemma (#39210)
* bug fix: add vocab_size to t5gemmaconfig for pipeline. * Update checkpoint placeholder * minor change * minor change * minor change: update example. * fix: add vocab_size as an explict arg. * buf fix: remove vocab_size verification; instead, re-set encoder/decoder vocab size. Note, in t5gemma, vocab size of encoder/decoder shoud be always the same. * add `add_generation_prompt` for message preprocessing.
This commit is contained in:
@@ -14,7 +14,13 @@ specific language governing permissions and limitations under the License.
|
|||||||
rendered properly in your Markdown viewer.
|
rendered properly in your Markdown viewer.
|
||||||
|
|
||||||
-->
|
-->
|
||||||
|
<div style="float: right;">
|
||||||
|
<div class="flex flex-wrap space-x-1">
|
||||||
|
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||||
|
<img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
|
||||||
|
<img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
# T5Gemma
|
# T5Gemma
|
||||||
|
|
||||||
@@ -24,6 +30,9 @@ T5Gemma has two groups of model sizes: 1) [Gemma 2](https://ai.google.dev/gemma/
|
|||||||
|
|
||||||
The pretrained varaints are trained with two objectives: prefix language modeling with knowledge distillation (PrefixLM) and UL2, separately. We release both variants for each model size. The instruction-turned varaints was post-trained with supervised fine-tuning and reinforcement learning.
|
The pretrained varaints are trained with two objectives: prefix language modeling with knowledge distillation (PrefixLM) and UL2, separately. We release both variants for each model size. The instruction-turned varaints was post-trained with supervised fine-tuning and reinforcement learning.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Click on the T5Gemma models in the right sidebar for more examples of how to apply T5Gemma to different language tasks.
|
||||||
|
|
||||||
The example below demonstrates how to chat with the model with [`Pipeline`] or the [`AutoModel`] class, and from the command line.
|
The example below demonstrates how to chat with the model with [`Pipeline`] or the [`AutoModel`] class, and from the command line.
|
||||||
|
|
||||||
<hfoptions id="usage">
|
<hfoptions id="usage">
|
||||||
@@ -35,43 +44,52 @@ import torch
|
|||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
|
||||||
pipe = pipeline(
|
pipe = pipeline(
|
||||||
task="text2text-generation",
|
"text2text-generation",
|
||||||
model="google/t5gemma-placeholder",
|
model="google/t5gemma-2b-2b-prefixlm-it",
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
device="cuda",
|
device="cuda", # replace with "mps" to run on a Mac device
|
||||||
)
|
)
|
||||||
|
|
||||||
pipe("Question: Why is the sky blue?\nAnswer:", max_new_tokens=50)
|
messages = [
|
||||||
|
{"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."},
|
||||||
|
]
|
||||||
|
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
|
pipe(prompt, max_new_tokens=32)
|
||||||
```
|
```
|
||||||
|
|
||||||
</hfoption>
|
</hfoption>
|
||||||
<hfoption id="AutoModel">
|
<hfoption id="AutoModel">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
# pip install accelerate
|
||||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
||||||
|
import torch
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-placeholder")
|
tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||||
"google/t5gemma-placeholder",
|
"google/t5gemma-2b-2b-prefixlm-it",
|
||||||
|
device_map="auto",
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
device_map="auto"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
input_text = "Question: Why is the sky blue?\nAnswer:"
|
messages = [
|
||||||
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
{"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."},
|
||||||
|
]
|
||||||
|
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True, add_generation_prompt=True).to("cuda")
|
||||||
|
|
||||||
outputs = model.generate(**input_ids, max_new_tokens=32)
|
outputs = model.generate(**input_ids, max_new_tokens=32)
|
||||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
print(tokenizer.decode(outputs[0]))
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</hfoption>
|
</hfoption>
|
||||||
<hfoption id="transformers CLI">
|
<hfoption id="transformers CLI">
|
||||||
|
|
||||||
```
|
```
|
||||||
echo -e "Question: Why is the sky blue? Answer:" | transformers run --task text2text-generation --model google/t5gemma-placeholder --device 0
|
echo -e "Write me a poem about Machine Learning. Answer:" | transformers run --task text2text-generation --model google/t5gemma-2b-2b-prefixlm --device 0
|
||||||
```
|
```
|
||||||
|
</hfoption>
|
||||||
|
</hfoptions>
|
||||||
|
|
||||||
## T5GemmaConfig
|
## T5GemmaConfig
|
||||||
|
|
||||||
|
|||||||
@@ -186,10 +186,10 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
|
This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
|
||||||
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
||||||
defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
|
defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
|
||||||
e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder)
|
e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it)
|
||||||
```python
|
```python
|
||||||
>>> from transformers import T5GemmaConfig, T5GemmaModel
|
>>> from transformers import T5GemmaConfig, T5GemmaModel
|
||||||
>>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder")
|
>>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
|
||||||
>>> model = T5GemmaModel(t5gemma_config)
|
>>> model = T5GemmaModel(t5gemma_config)
|
||||||
```
|
```
|
||||||
Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
|
Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
|
||||||
@@ -209,6 +209,8 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
The dropout ratio for attention.
|
The dropout ratio for attention.
|
||||||
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
|
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
|
||||||
Whether tie input and output embeddings.
|
Whether tie input and output embeddings.
|
||||||
|
vocab_size (`int`, *optional*, defaults to 256000):
|
||||||
|
Vocabulary size of the T5Gemma model (the same as Gemma 2).
|
||||||
kwargs (additional keyword arguments, optional, *optional*):
|
kwargs (additional keyword arguments, optional, *optional*):
|
||||||
Will be passed to the PretrainedConfig base class.
|
Will be passed to the PretrainedConfig base class.
|
||||||
"""
|
"""
|
||||||
@@ -257,6 +259,7 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
classifier_dropout_rate: float = 0.0,
|
classifier_dropout_rate: float = 0.0,
|
||||||
attention_dropout: float = 0.0,
|
attention_dropout: float = 0.0,
|
||||||
tie_word_embeddings: bool = True,
|
tie_word_embeddings: bool = True,
|
||||||
|
vocab_size: int = 256000,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if isinstance(encoder, dict):
|
if isinstance(encoder, dict):
|
||||||
@@ -302,6 +305,9 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
self.classifier_dropout_rate = classifier_dropout_rate
|
self.classifier_dropout_rate = classifier_dropout_rate
|
||||||
self.tie_word_embeddings = tie_word_embeddings
|
self.tie_word_embeddings = tie_word_embeddings
|
||||||
|
|
||||||
|
# Used in pipeline generation.
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
|
||||||
def __setattr__(self, key, value):
|
def __setattr__(self, key, value):
|
||||||
shared_attr_with_submodules = [
|
shared_attr_with_submodules = [
|
||||||
"output_hidden_states",
|
"output_hidden_states",
|
||||||
@@ -309,6 +315,7 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
"_attn_implementation",
|
"_attn_implementation",
|
||||||
"dropout_rate",
|
"dropout_rate",
|
||||||
"attention_dropout",
|
"attention_dropout",
|
||||||
|
"vocab_size",
|
||||||
]
|
]
|
||||||
|
|
||||||
if key in shared_attr_with_submodules:
|
if key in shared_attr_with_submodules:
|
||||||
|
|||||||
@@ -56,8 +56,7 @@ from ..gemma2.modeling_gemma2 import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# TODO(bzhanggo): figure out these documentations
|
_CHECKPOINT_FOR_DOC = "google/t5gemma-2b-2b-prefixlm-it"
|
||||||
_CHECKPOINT_FOR_DOC = "google/t5gemma-placeholder"
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_flex_attn_available():
|
if is_torch_flex_attn_available():
|
||||||
@@ -76,10 +75,10 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
|
This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
|
||||||
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
||||||
defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
|
defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
|
||||||
e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder)
|
e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it)
|
||||||
```python
|
```python
|
||||||
>>> from transformers import T5GemmaConfig, T5GemmaModel
|
>>> from transformers import T5GemmaConfig, T5GemmaModel
|
||||||
>>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder")
|
>>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
|
||||||
>>> model = T5GemmaModel(t5gemma_config)
|
>>> model = T5GemmaModel(t5gemma_config)
|
||||||
```
|
```
|
||||||
Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
|
Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
|
||||||
@@ -99,6 +98,8 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
The dropout ratio for attention.
|
The dropout ratio for attention.
|
||||||
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
|
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
|
||||||
Whether tie input and output embeddings.
|
Whether tie input and output embeddings.
|
||||||
|
vocab_size (`int`, *optional*, defaults to 256000):
|
||||||
|
Vocabulary size of the T5Gemma model (the same as Gemma 2).
|
||||||
kwargs (additional keyword arguments, optional, *optional*):
|
kwargs (additional keyword arguments, optional, *optional*):
|
||||||
Will be passed to the PretrainedConfig base class.
|
Will be passed to the PretrainedConfig base class.
|
||||||
"""
|
"""
|
||||||
@@ -147,6 +148,7 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
classifier_dropout_rate: float = 0.0,
|
classifier_dropout_rate: float = 0.0,
|
||||||
attention_dropout: float = 0.0,
|
attention_dropout: float = 0.0,
|
||||||
tie_word_embeddings: bool = True,
|
tie_word_embeddings: bool = True,
|
||||||
|
vocab_size: int = 256000,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if isinstance(encoder, dict):
|
if isinstance(encoder, dict):
|
||||||
@@ -192,6 +194,9 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
self.classifier_dropout_rate = classifier_dropout_rate
|
self.classifier_dropout_rate = classifier_dropout_rate
|
||||||
self.tie_word_embeddings = tie_word_embeddings
|
self.tie_word_embeddings = tie_word_embeddings
|
||||||
|
|
||||||
|
# Used in pipeline generation.
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
|
||||||
def __setattr__(self, key, value):
|
def __setattr__(self, key, value):
|
||||||
shared_attr_with_submodules = [
|
shared_attr_with_submodules = [
|
||||||
"output_hidden_states",
|
"output_hidden_states",
|
||||||
@@ -199,6 +204,7 @@ class T5GemmaConfig(PretrainedConfig):
|
|||||||
"_attn_implementation",
|
"_attn_implementation",
|
||||||
"dropout_rate",
|
"dropout_rate",
|
||||||
"attention_dropout",
|
"attention_dropout",
|
||||||
|
"vocab_size",
|
||||||
]
|
]
|
||||||
|
|
||||||
if key in shared_attr_with_submodules:
|
if key in shared_attr_with_submodules:
|
||||||
|
|||||||
Reference in New Issue
Block a user