Fix CI for VLMs (#35690)

* fix some easy test * more tests * remove logit check here also * add require_torch_large_gpu in Emu3
2025-01-20 11:15:39 +01:00
parent 5fa3534475
commit 8571bb145a
17 changed files with 102 additions and 485 deletions
--- a/docs/source/en/model_doc/emu3.md
+++ b/docs/source/en/model_doc/emu3.md
@@ -55,8 +55,8 @@ import torch
 from PIL import Image
 import requests

-processor = Emu3Processor.from_pretrained("Emu3-community/Emu3-Chat-hf")
-model = Emu3ForConditionalGeneration.from_pretrained("Emu3-community/Emu3-Chat-hf", torch_dtype=torch.bfloat16, device_map="cuda")
+processor = Emu3Processor.from_pretrained("BAAI/Emu3-Chat-hf")
+model = Emu3ForConditionalGeneration.from_pretrained("BAAI/Emu3-Chat-hf", torch_dtype=torch.bfloat16, device_map="cuda")

 # prepare image and text prompt
 url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
@@ -75,8 +75,8 @@ print(processor.decode(output[0], skip_special_tokens=True))
 Emu3 can also generate images from textual input. Here is how you can do it:

 ```python
-processor = Emu3Processor.from_pretrained("Emu3-community/Emu3-Gen-hf")
-model = Emu3ForConditionalGeneration.from_pretrained("Emu3-community/Emu3-Gen-hf", torch_dtype="bfloat16", device_map="auto", attn_implementation="flash_attention_2")
+processor = Emu3Processor.from_pretrained("BAAI/Emu3-Gen-hf")
+model = Emu3ForConditionalGeneration.from_pretrained("BAAI/Emu3-Gen-hf", torch_dtype="bfloat16", device_map="auto", attn_implementation="flash_attention_2")


 inputs = processor(