From a844297088d046b3d16d3726ca96822b479e884c Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 25 Mar 2025 11:34:21 -0700 Subject: [PATCH] [docs] Fix image link (#36869) * fix image link * fix * update * fix --- docs/source/en/processors.md | 4 +- .../models/gemma3/modeling_gemma3.py | 37 ++++++++++++++----- .../models/gemma3/modular_gemma3.py | 37 ++++++++++++++----- .../models/paligemma/modeling_paligemma.py | 12 +++--- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/docs/source/en/processors.md b/docs/source/en/processors.md index 9e0083eac8..7bd6be74f5 100644 --- a/docs/source/en/processors.md +++ b/docs/source/en/processors.md @@ -29,8 +29,8 @@ import requests processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224") -prompt = "answer en Where is the cow standing?" -url = "https://huggingface.co/gv-hf/PaliGemma-test-224px-hf/resolve/main/cow_beach_1.png" +prompt = "answer en Where is the cat standing?" +url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg" image = Image.open(requests.get(url, stream=True).raw) inputs = processor(text=prompt, images=image, return_tensors="pt") diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index 70b96c6f0b..ac06bdeb82 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -1272,20 +1272,37 @@ class Gemma3ForConditionalGeneration(Gemma3PreTrainedModel, GenerationMixin): >>> import requests >>> from transformers import AutoProcessor, Gemma3ForConditionalGeneration - >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/Gemma3-test-224px-hf") - >>> processor = AutoProcessor.from_pretrained("google/Gemma3-test-224px-hf") + >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/gemma-3-4b-it") + >>> processor = AutoProcessor.from_pretrained("google/gemma-3-4b-it") - >>> prompt = "answer en Where is the cow standing?" - >>> url = "https://huggingface.co/gv-hf/Gemma3-test-224px-hf/resolve/main/cow_beach_1.png" - >>> image = Image.open(requests.get(url, stream=True).raw) - - >>> inputs = processor(images=image, text=prompt, return_tensors="pt") + >>> messages = [ + ... { + ... "role": "system", + ... "content": [ + ... {"type": "text", "text": "You are a helpful assistant."} + ... ] + ... }, + ... { + ... "role": "user", "content": [ + ... {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"}, + ... {"type": "text", "text": "Where is the cat standing?"}, + ... ] + ... }, + ... ] + >>> inputs = processor.apply_chat_template( + ... messages, + ... tokenizer=True, + ... return_dict=True, + ... return_tensors="pt", + ... add_generation_prompt=True + ... ) >>> # Generate - >>> generate_ids = model.generate(**inputs, max_length=30) + >>> generate_ids = model.generate(**inputs) >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] - "answer en Where is the cow standing?\nbeach" - ```""" + "user\nYou are a helpful assistant.\n\n\n\n\n\nWhere is the cat standing?\nmodel\nBased on the image, the cat is standing in a snowy area, likely outdoors. It appears to" + ``` + """ if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError("You must specify exactly one of input_ids or inputs_embeds") diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index eab9db4df2..e9baaf1c52 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -883,20 +883,37 @@ class Gemma3ForConditionalGeneration(PaliGemmaForConditionalGeneration): >>> import requests >>> from transformers import AutoProcessor, Gemma3ForConditionalGeneration - >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/Gemma3-test-224px-hf") - >>> processor = AutoProcessor.from_pretrained("google/Gemma3-test-224px-hf") + >>> model = Gemma3ForConditionalGeneration.from_pretrained("google/gemma-3-4b-it") + >>> processor = AutoProcessor.from_pretrained("google/gemma-3-4b-it") - >>> prompt = "answer en Where is the cow standing?" - >>> url = "https://huggingface.co/gv-hf/Gemma3-test-224px-hf/resolve/main/cow_beach_1.png" - >>> image = Image.open(requests.get(url, stream=True).raw) - - >>> inputs = processor(images=image, text=prompt, return_tensors="pt") + >>> messages = [ + ... { + ... "role": "system", + ... "content": [ + ... {"type": "text", "text": "You are a helpful assistant."} + ... ] + ... }, + ... { + ... "role": "user", "content": [ + ... {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"}, + ... {"type": "text", "text": "Where is the cat standing?"}, + ... ] + ... }, + ... ] + >>> inputs = processor.apply_chat_template( + ... messages, + ... tokenizer=True, + ... return_dict=True, + ... return_tensors="pt", + ... add_generation_prompt=True + ... ) >>> # Generate - >>> generate_ids = model.generate(**inputs, max_length=30) + >>> generate_ids = model.generate(**inputs) >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] - "answer en Where is the cow standing?\nbeach" - ```""" + "user\nYou are a helpful assistant.\n\n\n\n\n\nWhere is the cat standing?\nmodel\nBased on the image, the cat is standing in a snowy area, likely outdoors. It appears to" + ``` + """ if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError("You must specify exactly one of input_ids or inputs_embeds") diff --git a/src/transformers/models/paligemma/modeling_paligemma.py b/src/transformers/models/paligemma/modeling_paligemma.py index 64ebee00a0..238658add5 100644 --- a/src/transformers/models/paligemma/modeling_paligemma.py +++ b/src/transformers/models/paligemma/modeling_paligemma.py @@ -464,19 +464,19 @@ class PaliGemmaForConditionalGeneration(PaliGemmaPreTrainedModel, GenerationMixi >>> import requests >>> from transformers import AutoProcessor, PaliGemmaForConditionalGeneration - >>> model = PaliGemmaForConditionalGeneration.from_pretrained("google/PaliGemma-test-224px-hf") - >>> processor = AutoProcessor.from_pretrained("google/PaliGemma-test-224px-hf") + >>> model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2-3b-mix-224") + >>> processor = AutoProcessor.from_pretrained("google/paligemma2-3b-mix-224") - >>> prompt = "answer en Where is the cow standing?" - >>> url = "https://huggingface.co/gv-hf/PaliGemma-test-224px-hf/resolve/main/cow_beach_1.png" + >>> prompt = "Where is the cat standing?" + >>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg" >>> image = Image.open(requests.get(url, stream=True).raw) >>> inputs = processor(images=image, text=prompt, return_tensors="pt") >>> # Generate - >>> generate_ids = model.generate(**inputs, max_length=30) + >>> generate_ids = model.generate(**inputs,) >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] - "answer en Where is the cow standing?\nbeach" + "Where is the cat standing?\nsnow" ```""" if (input_ids is None) ^ (inputs_embeds is not None):