Fix AutoConfig and AutoModel support for Llava-Next-Video (#32844)

* Fix: fix all model_type of Llava-Next-Video to llava_next_video

* Fix doc for llava_next_video

* * Fix formatting issues
* Change llava-next-video.md file name into llava_next_video.md to make it compatible with implementation

* Fix docs TOC for llava-next-video
This commit is contained in:
Yangshen⚡Deng
2024-08-16 19:41:05 +08:00
committed by GitHub
parent cf32ee1753
commit a27182b7fc
8 changed files with 9 additions and 9 deletions

View File

@@ -822,7 +822,7 @@
title: Llava
- local: model_doc/llava_next
title: LLaVA-NeXT
- local: model_doc/llava-next-video
- local: model_doc/llava_next_video
title: LLaVa-NeXT-Video
- local: model_doc/lxmert
title: LXMERT

View File

@@ -186,7 +186,7 @@ Flax), PyTorch, and/or TensorFlow.
| [Llama3](model_doc/llama3) | ✅ | ❌ | ✅ |
| [LLaVa](model_doc/llava) | ✅ | ❌ | ❌ |
| [LLaVA-NeXT](model_doc/llava_next) | ✅ | ❌ | ❌ |
| [LLaVa-NeXT-Video](model_doc/llava-next-video) | ✅ | ❌ | ❌ |
| [LLaVa-NeXT-Video](model_doc/llava_next_video) | ✅ | ❌ | ❌ |
| [Longformer](model_doc/longformer) | ✅ | ✅ | ❌ |
| [LongT5](model_doc/longt5) | ✅ | ❌ | ✅ |
| [LUKE](model_doc/luke) | ✅ | ❌ | ❌ |

View File

@@ -145,8 +145,8 @@ CONFIG_MAPPING_NAMES = OrderedDict(
("lilt", "LiltConfig"),
("llama", "LlamaConfig"),
("llava", "LlavaConfig"),
("llava-next-video", "LlavaNextVideoConfig"),
("llava_next", "LlavaNextConfig"),
("llava_next_video", "LlavaNextVideoConfig"),
("longformer", "LongformerConfig"),
("longt5", "LongT5Config"),
("luke", "LukeConfig"),
@@ -436,8 +436,8 @@ MODEL_NAMES_MAPPING = OrderedDict(
("llama2", "Llama2"),
("llama3", "Llama3"),
("llava", "LLaVa"),
("llava-next-video", "LLaVa-NeXT-Video"),
("llava_next", "LLaVA-NeXT"),
("llava_next_video", "LLaVa-NeXT-Video"),
("longformer", "Longformer"),
("longt5", "LongT5"),
("luke", "LUKE"),

View File

@@ -97,8 +97,8 @@ else:
("layoutlmv3", ("LayoutLMv3ImageProcessor",)),
("levit", ("LevitImageProcessor",)),
("llava", ("CLIPImageProcessor",)),
("llava-next-video", ("LlavaNextVideoImageProcessor",)),
("llava_next", ("LlavaNextImageProcessor",)),
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
("mask2former", ("Mask2FormerImageProcessor",)),
("maskformer", ("MaskFormerImageProcessor",)),
("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),

View File

@@ -308,8 +308,8 @@ MODEL_FOR_PRETRAINING_MAPPING_NAMES = OrderedDict(
("idefics2", "Idefics2ForConditionalGeneration"),
("layoutlm", "LayoutLMForMaskedLM"),
("llava", "LlavaForConditionalGeneration"),
("llava-next-video", "LlavaNextVideoForConditionalGeneration"),
("llava_next", "LlavaNextForConditionalGeneration"),
("llava_next_video", "LlavaNextVideoForConditionalGeneration"),
("longformer", "LongformerForMaskedLM"),
("luke", "LukeForMaskedLM"),
("lxmert", "LxmertForPreTraining"),
@@ -721,8 +721,8 @@ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
("instructblipvideo", "InstructBlipVideoForConditionalGeneration"),
("kosmos-2", "Kosmos2ForConditionalGeneration"),
("llava", "LlavaForConditionalGeneration"),
("llava-next-video", "LlavaNextVideoForConditionalGeneration"),
("llava_next", "LlavaNextForConditionalGeneration"),
("llava_next_video", "LlavaNextVideoForConditionalGeneration"),
("paligemma", "PaliGemmaForConditionalGeneration"),
("pix2struct", "Pix2StructForConditionalGeneration"),
("video_llava", "VideoLlavaForConditionalGeneration"),

View File

@@ -71,8 +71,8 @@ PROCESSOR_MAPPING_NAMES = OrderedDict(
("layoutlmv2", "LayoutLMv2Processor"),
("layoutlmv3", "LayoutLMv3Processor"),
("llava", "LlavaProcessor"),
("llava-next-video", "LlavaNextVideoProcessor"),
("llava_next", "LlavaNextProcessor"),
("llava_next_video", "LlavaNextVideoProcessor"),
("markuplm", "MarkupLMProcessor"),
("mctct", "MCTCTProcessor"),
("mgp-str", "MgpstrProcessor"),

View File

@@ -257,8 +257,8 @@ else:
),
),
("llava", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
("llava-next-video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
("llava_next", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
("llava_next_video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)),
("longformer", ("LongformerTokenizer", "LongformerTokenizerFast" if is_tokenizers_available() else None)),
(
"longt5",