Fix AutoConfig and AutoModel support for Llava-Next-Video (#32844)
* Fix: fix all model_type of Llava-Next-Video to llava_next_video * Fix doc for llava_next_video * * Fix formatting issues * Change llava-next-video.md file name into llava_next_video.md to make it compatible with implementation * Fix docs TOC for llava-next-video
This commit is contained in:
@@ -822,7 +822,7 @@
|
||||
title: Llava
|
||||
- local: model_doc/llava_next
|
||||
title: LLaVA-NeXT
|
||||
- local: model_doc/llava-next-video
|
||||
- local: model_doc/llava_next_video
|
||||
title: LLaVa-NeXT-Video
|
||||
- local: model_doc/lxmert
|
||||
title: LXMERT
|
||||
|
||||
@@ -186,7 +186,7 @@ Flax), PyTorch, and/or TensorFlow.
|
||||
| [Llama3](model_doc/llama3) | ✅ | ❌ | ✅ |
|
||||
| [LLaVa](model_doc/llava) | ✅ | ❌ | ❌ |
|
||||
| [LLaVA-NeXT](model_doc/llava_next) | ✅ | ❌ | ❌ |
|
||||
| [LLaVa-NeXT-Video](model_doc/llava-next-video) | ✅ | ❌ | ❌ |
|
||||
| [LLaVa-NeXT-Video](model_doc/llava_next_video) | ✅ | ❌ | ❌ |
|
||||
| [Longformer](model_doc/longformer) | ✅ | ✅ | ❌ |
|
||||
| [LongT5](model_doc/longt5) | ✅ | ❌ | ✅ |
|
||||
| [LUKE](model_doc/luke) | ✅ | ❌ | ❌ |
|
||||
|
||||
@@ -145,8 +145,8 @@ CONFIG_MAPPING_NAMES = OrderedDict(
|
||||
("lilt", "LiltConfig"),
|
||||
("llama", "LlamaConfig"),
|
||||
("llava", "LlavaConfig"),
|
||||
("llava-next-video", "LlavaNextVideoConfig"),
|
||||
("llava_next", "LlavaNextConfig"),
|
||||
("llava_next_video", "LlavaNextVideoConfig"),
|
||||
("longformer", "LongformerConfig"),
|
||||
("longt5", "LongT5Config"),
|
||||
("luke", "LukeConfig"),
|
||||
@@ -436,8 +436,8 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
||||
("llama2", "Llama2"),
|
||||
("llama3", "Llama3"),
|
||||
("llava", "LLaVa"),
|
||||
("llava-next-video", "LLaVa-NeXT-Video"),
|
||||
("llava_next", "LLaVA-NeXT"),
|
||||
("llava_next_video", "LLaVa-NeXT-Video"),
|
||||
("longformer", "Longformer"),
|
||||
("longt5", "LongT5"),
|
||||
("luke", "LUKE"),
|
||||
|
||||
@@ -97,8 +97,8 @@ else:
|
||||
("layoutlmv3", ("LayoutLMv3ImageProcessor",)),
|
||||
("levit", ("LevitImageProcessor",)),
|
||||
("llava", ("CLIPImageProcessor",)),
|
||||
("llava-next-video", ("LlavaNextVideoImageProcessor",)),
|
||||
("llava_next", ("LlavaNextImageProcessor",)),
|
||||
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
|
||||
("mask2former", ("Mask2FormerImageProcessor",)),
|
||||
("maskformer", ("MaskFormerImageProcessor",)),
|
||||
("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
|
||||
@@ -308,8 +308,8 @@ MODEL_FOR_PRETRAINING_MAPPING_NAMES = OrderedDict(
|
||||
("idefics2", "Idefics2ForConditionalGeneration"),
|
||||
("layoutlm", "LayoutLMForMaskedLM"),
|
||||
("llava", "LlavaForConditionalGeneration"),
|
||||
("llava-next-video", "LlavaNextVideoForConditionalGeneration"),
|
||||
("llava_next", "LlavaNextForConditionalGeneration"),
|
||||
("llava_next_video", "LlavaNextVideoForConditionalGeneration"),
|
||||
("longformer", "LongformerForMaskedLM"),
|
||||
("luke", "LukeForMaskedLM"),
|
||||
("lxmert", "LxmertForPreTraining"),
|
||||
@@ -721,8 +721,8 @@ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
|
||||
("instructblipvideo", "InstructBlipVideoForConditionalGeneration"),
|
||||
("kosmos-2", "Kosmos2ForConditionalGeneration"),
|
||||
("llava", "LlavaForConditionalGeneration"),
|
||||
("llava-next-video", "LlavaNextVideoForConditionalGeneration"),
|
||||
("llava_next", "LlavaNextForConditionalGeneration"),
|
||||
("llava_next_video", "LlavaNextVideoForConditionalGeneration"),
|
||||
("paligemma", "PaliGemmaForConditionalGeneration"),
|
||||
("pix2struct", "Pix2StructForConditionalGeneration"),
|
||||
("video_llava", "VideoLlavaForConditionalGeneration"),
|
||||
|
||||
@@ -71,8 +71,8 @@ PROCESSOR_MAPPING_NAMES = OrderedDict(
|
||||
("layoutlmv2", "LayoutLMv2Processor"),
|
||||
("layoutlmv3", "LayoutLMv3Processor"),
|
||||
("llava", "LlavaProcessor"),
|
||||
("llava-next-video", "LlavaNextVideoProcessor"),
|
||||
("llava_next", "LlavaNextProcessor"),
|
||||
("llava_next_video", "LlavaNextVideoProcessor"),
|
||||
("markuplm", "MarkupLMProcessor"),
|
||||
("mctct", "MCTCTProcessor"),
|
||||
("mgp-str", "MgpstrProcessor"),
|
||||
|
||||
@@ -257,8 +257,8 @@ else:
|
||||
),
|
||||
),
|
||||
("llava", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("llava-next-video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("llava_next", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("llava_next_video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("longformer", ("LongformerTokenizer", "LongformerTokenizerFast" if is_tokenizers_available() else None)),
|
||||
(
|
||||
"longt5",
|
||||
|
||||
Reference in New Issue
Block a user