From 7ef592c96cbf616492ac4181a390d465189abec6 Mon Sep 17 00:00:00 2001 From: Biao Zhang <17406686+bzhangGo@users.noreply.github.com> Date: Tue, 8 Jul 2025 13:08:48 -0400 Subject: [PATCH] Update T5gemma (#39210) * bug fix: add vocab_size to t5gemmaconfig for pipeline. * Update checkpoint placeholder * minor change * minor change * minor change: update example. * fix: add vocab_size as an explict arg. * buf fix: remove vocab_size verification; instead, re-set encoder/decoder vocab size. Note, in t5gemma, vocab size of encoder/decoder shoud be always the same. * add `add_generation_prompt` for message preprocessing. --- docs/source/en/model_doc/t5gemma.md | 46 +++++++++++++------ .../models/t5gemma/configuration_t5gemma.py | 11 ++++- .../models/t5gemma/modular_t5gemma.py | 14 ++++-- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/docs/source/en/model_doc/t5gemma.md b/docs/source/en/model_doc/t5gemma.md index d8615a9add..72140b21d3 100644 --- a/docs/source/en/model_doc/t5gemma.md +++ b/docs/source/en/model_doc/t5gemma.md @@ -14,7 +14,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> - +
+
+ PyTorch + FlashAttention + SDPA +
+
# T5Gemma @@ -24,6 +30,9 @@ T5Gemma has two groups of model sizes: 1) [Gemma 2](https://ai.google.dev/gemma/ The pretrained varaints are trained with two objectives: prefix language modeling with knowledge distillation (PrefixLM) and UL2, separately. We release both variants for each model size. The instruction-turned varaints was post-trained with supervised fine-tuning and reinforcement learning. +> [!TIP] +> Click on the T5Gemma models in the right sidebar for more examples of how to apply T5Gemma to different language tasks. + The example below demonstrates how to chat with the model with [`Pipeline`] or the [`AutoModel`] class, and from the command line. @@ -35,43 +44,52 @@ import torch from transformers import pipeline pipe = pipeline( - task="text2text-generation", - model="google/t5gemma-placeholder", + "text2text-generation", + model="google/t5gemma-2b-2b-prefixlm-it", torch_dtype=torch.bfloat16, - device="cuda", + device="cuda", # replace with "mps" to run on a Mac device ) -pipe("Question: Why is the sky blue?\nAnswer:", max_new_tokens=50) +messages = [ + {"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."}, +] +prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + +pipe(prompt, max_new_tokens=32) ``` ```python -import torch +# pip install accelerate from transformers import AutoTokenizer, AutoModelForSeq2SeqLM +import torch -tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-placeholder") +tokenizer = AutoTokenizer.from_pretrained("google/t5gemma-2b-2b-prefixlm-it") model = AutoModelForSeq2SeqLM.from_pretrained( - "google/t5gemma-placeholder", + "google/t5gemma-2b-2b-prefixlm-it", + device_map="auto", torch_dtype=torch.bfloat16, - device_map="auto" ) -input_text = "Question: Why is the sky blue?\nAnswer:" -input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") +messages = [ + {"role": "user", "content": "Tell me an unknown interesting biology fact about the brain."}, +] +input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True, add_generation_prompt=True).to("cuda") outputs = model.generate(**input_ids, max_new_tokens=32) -print(tokenizer.decode(outputs[0], skip_special_tokens=True)) - +print(tokenizer.decode(outputs[0])) ``` ``` -echo -e "Question: Why is the sky blue? Answer:" | transformers run --task text2text-generation --model google/t5gemma-placeholder --device 0 +echo -e "Write me a poem about Machine Learning. Answer:" | transformers run --task text2text-generation --model google/t5gemma-2b-2b-prefixlm --device 0 ``` + + ## T5GemmaConfig diff --git a/src/transformers/models/t5gemma/configuration_t5gemma.py b/src/transformers/models/t5gemma/configuration_t5gemma.py index ff51537d81..bc195d562f 100644 --- a/src/transformers/models/t5gemma/configuration_t5gemma.py +++ b/src/transformers/models/t5gemma/configuration_t5gemma.py @@ -186,10 +186,10 @@ class T5GemmaConfig(PretrainedConfig): This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model. - e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder) + e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it) ```python >>> from transformers import T5GemmaConfig, T5GemmaModel - >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder") + >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it") >>> model = T5GemmaModel(t5gemma_config) ``` Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the @@ -209,6 +209,8 @@ class T5GemmaConfig(PretrainedConfig): The dropout ratio for attention. tie_word_embeddings (`bool`, *optional*, defaults to `True`): Whether tie input and output embeddings. + vocab_size (`int`, *optional*, defaults to 256000): + Vocabulary size of the T5Gemma model (the same as Gemma 2). kwargs (additional keyword arguments, optional, *optional*): Will be passed to the PretrainedConfig base class. """ @@ -257,6 +259,7 @@ class T5GemmaConfig(PretrainedConfig): classifier_dropout_rate: float = 0.0, attention_dropout: float = 0.0, tie_word_embeddings: bool = True, + vocab_size: int = 256000, **kwargs, ): if isinstance(encoder, dict): @@ -302,6 +305,9 @@ class T5GemmaConfig(PretrainedConfig): self.classifier_dropout_rate = classifier_dropout_rate self.tie_word_embeddings = tie_word_embeddings + # Used in pipeline generation. + self.vocab_size = vocab_size + def __setattr__(self, key, value): shared_attr_with_submodules = [ "output_hidden_states", @@ -309,6 +315,7 @@ class T5GemmaConfig(PretrainedConfig): "_attn_implementation", "dropout_rate", "attention_dropout", + "vocab_size", ] if key in shared_attr_with_submodules: diff --git a/src/transformers/models/t5gemma/modular_t5gemma.py b/src/transformers/models/t5gemma/modular_t5gemma.py index 56533855ee..9360008e30 100644 --- a/src/transformers/models/t5gemma/modular_t5gemma.py +++ b/src/transformers/models/t5gemma/modular_t5gemma.py @@ -56,8 +56,7 @@ from ..gemma2.modeling_gemma2 import ( ) -# TODO(bzhanggo): figure out these documentations -_CHECKPOINT_FOR_DOC = "google/t5gemma-placeholder" +_CHECKPOINT_FOR_DOC = "google/t5gemma-2b-2b-prefixlm-it" if is_torch_flex_attn_available(): @@ -76,10 +75,10 @@ class T5GemmaConfig(PretrainedConfig): This is the configuration class to store the configuration of a [`T5GemmaModel`]. It is used to instantiate an T5Gemma model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to a hypothetical balanced Gemma2 encoder-decoder model. - e.g. [google/t5gemma-placeholder](https://huggingface.co/google/t5gemma-placeholder) + e.g. [google/t5gemma-2b-2b-prefixlm-it](https://huggingface.co/google/t5gemma-2b-2b-prefixlm-it) ```python >>> from transformers import T5GemmaConfig, T5GemmaModel - >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-placeholder") + >>> t5gemma_config = T5GemmaConfig.from_pretrained("google/t5gemma-2b-2b-prefixlm-it") >>> model = T5GemmaModel(t5gemma_config) ``` Configuration objects inherit from [PretrainedConfig] and can be used to control the model outputs. Read the @@ -99,6 +98,8 @@ class T5GemmaConfig(PretrainedConfig): The dropout ratio for attention. tie_word_embeddings (`bool`, *optional*, defaults to `True`): Whether tie input and output embeddings. + vocab_size (`int`, *optional*, defaults to 256000): + Vocabulary size of the T5Gemma model (the same as Gemma 2). kwargs (additional keyword arguments, optional, *optional*): Will be passed to the PretrainedConfig base class. """ @@ -147,6 +148,7 @@ class T5GemmaConfig(PretrainedConfig): classifier_dropout_rate: float = 0.0, attention_dropout: float = 0.0, tie_word_embeddings: bool = True, + vocab_size: int = 256000, **kwargs, ): if isinstance(encoder, dict): @@ -192,6 +194,9 @@ class T5GemmaConfig(PretrainedConfig): self.classifier_dropout_rate = classifier_dropout_rate self.tie_word_embeddings = tie_word_embeddings + # Used in pipeline generation. + self.vocab_size = vocab_size + def __setattr__(self, key, value): shared_attr_with_submodules = [ "output_hidden_states", @@ -199,6 +204,7 @@ class T5GemmaConfig(PretrainedConfig): "_attn_implementation", "dropout_rate", "attention_dropout", + "vocab_size", ] if key in shared_attr_with_submodules: