[Chameleon, Hiera] Improve docs (#32038)

* Improve docs * Fix docs * Fix code snippet
2024-07-19 10:20:03 +02:00
parent b873234cb6
commit 56a7745704
5 changed files with 25 additions and 11 deletions
--- a/docs/source/en/model_doc/chameleon.md
+++ b/docs/source/en/model_doc/chameleon.md
@@ -69,13 +69,13 @@ import torch
 from PIL import Image
 import requests

-processor = ChameleonProcessor.from_pretrained("meta-chameleon")
-model = ChameleonForConditionalGeneration.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto") 
+processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
+model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16, device_map="auto")

 # prepare image and text prompt
-url = "https://bjiujitsu.com/wp-content/uploads/2021/01/jiu_jitsu_belt_white_1.jpg"
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 image = Image.open(requests.get(url, stream=True).raw)
-prompt = "What color is the belt in this image?<image>"
+prompt = "What do you see in this image?<image>"

 inputs = processor(prompt, image, return_tensors="pt").to(model.device)

@@ -94,8 +94,8 @@ import torch
 from PIL import Image
 import requests

-processor = ChameleonProcessor.from_pretrained("meta-chameleon")
-model = ChameleonForConditionalGeneration.from_pretrained("meta-chameleon", torch_dtype=torch.float16, device_map="auto") 
+processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
+model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16, device_map="auto")

 # Get three different images
 url = "https://www.ilankelman.org/stopsigns/australia.jpg"
@@ -115,7 +115,7 @@ prompts = [

 # We can simply feed images in the order they have to be used in the text prompt
 # Each "<image>" token uses one image leaving the next for the subsequent "<image>" tokens
-inputs = processor(text=prompts, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+inputs = processor(text=prompts, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(device="cuda", dtype=torch.float16)

 # Generate
 generate_ids = model.generate(**inputs, max_new_tokens=50)