Update T5gemma (#39210)

* bug fix: add vocab_size to t5gemmaconfig for pipeline. * Update checkpoint placeholder * minor change * minor change * minor change: update example. * fix: add vocab_size as an explict arg. * buf fix: remove vocab_size verification; instead, re-set encoder/decoder vocab size. Note, in t5gemma, vocab size of encoder/decoder shoud be always the same. * add `add_generation_prompt` for message preprocessing.
2025-07-08 13:08:48 -04:00
parent 1ecd52e50a
commit 7ef592c96c
3 changed files with 51 additions and 20 deletions
--- a/docs/source/en/model_doc/t5gemma.md
+++ b/docs/source/en/model_doc/t5gemma.md
@@ -14,7 +14,13 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 -->
-
+<div style="float: right;">
    <div class="flex flex-wrap space-x-1">
        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
        <img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
        <img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white">
    </div>
 </div>
 # T5Gemma
@@ -24,6 +30,9 @@ T5Gemma has two groups of model sizes: 1) [Gemma 2](https://ai.google.dev/gemma/
 The pretrained varaints are trained with two objectives: prefix language modeling with knowledge distillation (PrefixLM) and UL2, separately. We release both variants for each model size. The instruction-turned varaints was post-trained with supervised fine-tuning and reinforcement learning.
 > [!TIP]
 > Click on the T5Gemma models in the right sidebar for more examples of how to apply T5Gemma to different language tasks.
 The example below demonstrates how to chat with the model with [`Pipeline`] or the [`AutoModel`] class, and from the command line.
 <hfoptions id="usage">
@@ -35,43 +44,52 @@ import torch
 from transformers import pipeline
 pipe = pipeline(
-    task="text2text-generation",
+    "text2text-generation",
-    model="google/t5gemma-placeholder",
+    model="google/t5gemma-2b-2b-prefixlm-it",
    torch_dtype=torch.bfloat16,
-    device="cuda",
+    device="cuda",  # replace with "mps" to run on a Mac device
 )
-pipe("Question: Why is the sky blue?\nAnswer:", max_new_tokens=50)
+messages = [
    {"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."},
 ]
 prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 pipe(prompt, max_new_tokens=32)
 ```
 </hfoption>
 <hfoption id="AutoModel">
 ```python
-import torch
+# pip install accelerate
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
-tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-placeholder")
+tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
 model = AutoModelForSeq2SeqLM.from_pretrained(
-    "google/t5gemma-placeholder",
+    "google/t5gemma-2b-2b-prefixlm-it",
    device_map="auto",
    torch_dtype=torch.bfloat16,
    device_map="auto"
 )
-input_text = "Question: Why is the sky blue?\nAnswer:"
+messages = [
-input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+    {"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."},
 ]
 input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True, add_generation_prompt=True).to("cuda")
 outputs = model.generate(**input_ids, max_new_tokens=32)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+print(tokenizer.decode(outputs[0]))
 ```
 </hfoption>
 <hfoption id="transformers CLI">
 ```
-echo -e "Question: Why is the sky blue? Answer:" | transformers run --task text2text-generation --model google/t5gemma-placeholder --device 0
+echo -e "Write me a poem about Machine Learning. Answer:" | transformers run --task text2text-generation --model google/t5gemma-2b-2b-prefixlm --device 0
 ```
 </hfoption>
 </hfoptions>
 ## T5GemmaConfig
--- a/src/transformers/models/t5gemma/configuration_t5gemma.py
+++ b/src/transformers/models/t5gemma/configuration_t5gemma.py
@@ -186,10 +186,10 @@ class T5GemmaConfig(PretrainedConfig):
    This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
-    e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder)
+    e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it)
    ```python
    >>> from transformers import T5GemmaConfig, T5GemmaModel
-    >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder")
+    >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
    >>> model = T5GemmaModel(t5gemma_config)
    ```
    Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
@@ -209,6 +209,8 @@ class T5GemmaConfig(PretrainedConfig):
            The dropout ratio for attention.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether tie input and output embeddings.
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the T5Gemma model (the same as Gemma 2).
        kwargs (additional keyword arguments, optional, *optional*):
            Will be passed to the PretrainedConfig base class.
    """
@@ -257,6 +259,7 @@ class T5GemmaConfig(PretrainedConfig):
        classifier_dropout_rate: float = 0.0,
        attention_dropout: float = 0.0,
        tie_word_embeddings: bool = True,
        vocab_size: int = 256000,
        **kwargs,
    ):
        if isinstance(encoder, dict):
@@ -302,6 +305,9 @@ class T5GemmaConfig(PretrainedConfig):
        self.classifier_dropout_rate = classifier_dropout_rate
        self.tie_word_embeddings = tie_word_embeddings
        # Used in pipeline generation.
        self.vocab_size = vocab_size
    def __setattr__(self, key, value):
        shared_attr_with_submodules = [
            "output_hidden_states",
@@ -309,6 +315,7 @@ class T5GemmaConfig(PretrainedConfig):
            "_attn_implementation",
            "dropout_rate",
            "attention_dropout",
            "vocab_size",
        ]
        if key in shared_attr_with_submodules:
--- a/src/transformers/models/t5gemma/modular_t5gemma.py
+++ b/src/transformers/models/t5gemma/modular_t5gemma.py
@@ -56,8 +56,7 @@ from ..gemma2.modeling_gemma2 import (
 )
-# TODO(bzhanggo): figure out these documentations
+_CHECKPOINT_FOR_DOC = "google/t5gemma-2b-2b-prefixlm-it"
 _CHECKPOINT_FOR_DOC = "google/t5gemma-placeholder"
 if is_torch_flex_attn_available():
@@ -76,10 +75,10 @@ class T5GemmaConfig(PretrainedConfig):
    This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model.
-    e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder)
+    e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it)
    ```python
    >>> from transformers import T5GemmaConfig, T5GemmaModel
-    >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder")
+    >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it")
    >>> model = T5GemmaModel(t5gemma_config)
    ```
    Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the
@@ -99,6 +98,8 @@ class T5GemmaConfig(PretrainedConfig):
            The dropout ratio for attention.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether tie input and output embeddings.
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the T5Gemma model (the same as Gemma 2).
        kwargs (additional keyword arguments, optional, *optional*):
            Will be passed to the PretrainedConfig base class.
    """
@@ -147,6 +148,7 @@ class T5GemmaConfig(PretrainedConfig):
        classifier_dropout_rate: float = 0.0,
        attention_dropout: float = 0.0,
        tie_word_embeddings: bool = True,
        vocab_size: int = 256000,
        **kwargs,
    ):
        if isinstance(encoder, dict):
@@ -192,6 +194,9 @@ class T5GemmaConfig(PretrainedConfig):
        self.classifier_dropout_rate = classifier_dropout_rate
        self.tie_word_embeddings = tie_word_embeddings
        # Used in pipeline generation.
        self.vocab_size = vocab_size
    def __setattr__(self, key, value):
        shared_attr_with_submodules = [
            "output_hidden_states",
@@ -199,6 +204,7 @@ class T5GemmaConfig(PretrainedConfig):
            "_attn_implementation",
            "dropout_rate",
            "attention_dropout",
            "vocab_size",
        ]
        if key in shared_attr_with_submodules: