diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 9f89c8669d..2160e5c733 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -822,7 +822,7 @@ title: Llava - local: model_doc/llava_next title: LLaVA-NeXT - - local: model_doc/llava-next-video + - local: model_doc/llava_next_video title: LLaVa-NeXT-Video - local: model_doc/lxmert title: LXMERT diff --git a/docs/source/en/index.md b/docs/source/en/index.md index 2fe725de7c..a652755123 100644 --- a/docs/source/en/index.md +++ b/docs/source/en/index.md @@ -186,7 +186,7 @@ Flax), PyTorch, and/or TensorFlow. | [Llama3](model_doc/llama3) | ✅ | ❌ | ✅ | | [LLaVa](model_doc/llava) | ✅ | ❌ | ❌ | | [LLaVA-NeXT](model_doc/llava_next) | ✅ | ❌ | ❌ | -| [LLaVa-NeXT-Video](model_doc/llava-next-video) | ✅ | ❌ | ❌ | +| [LLaVa-NeXT-Video](model_doc/llava_next_video) | ✅ | ❌ | ❌ | | [Longformer](model_doc/longformer) | ✅ | ✅ | ❌ | | [LongT5](model_doc/longt5) | ✅ | ❌ | ✅ | | [LUKE](model_doc/luke) | ✅ | ❌ | ❌ | diff --git a/docs/source/en/model_doc/llava-next-video.md b/docs/source/en/model_doc/llava_next_video.md similarity index 100% rename from docs/source/en/model_doc/llava-next-video.md rename to docs/source/en/model_doc/llava_next_video.md diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 2c4f815d7d..34d2ab0d65 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -145,8 +145,8 @@ CONFIG_MAPPING_NAMES = OrderedDict( ("lilt", "LiltConfig"), ("llama", "LlamaConfig"), ("llava", "LlavaConfig"), - ("llava-next-video", "LlavaNextVideoConfig"), ("llava_next", "LlavaNextConfig"), + ("llava_next_video", "LlavaNextVideoConfig"), ("longformer", "LongformerConfig"), ("longt5", "LongT5Config"), ("luke", "LukeConfig"), @@ -436,8 +436,8 @@ MODEL_NAMES_MAPPING = OrderedDict( ("llama2", "Llama2"), ("llama3", "Llama3"), ("llava", "LLaVa"), - ("llava-next-video", "LLaVa-NeXT-Video"), ("llava_next", "LLaVA-NeXT"), + ("llava_next_video", "LLaVa-NeXT-Video"), ("longformer", "Longformer"), ("longt5", "LongT5"), ("luke", "LUKE"), diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 8bfc61b9be..d072a1b3de 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -97,8 +97,8 @@ else: ("layoutlmv3", ("LayoutLMv3ImageProcessor",)), ("levit", ("LevitImageProcessor",)), ("llava", ("CLIPImageProcessor",)), - ("llava-next-video", ("LlavaNextVideoImageProcessor",)), ("llava_next", ("LlavaNextImageProcessor",)), + ("llava_next_video", ("LlavaNextVideoImageProcessor",)), ("mask2former", ("Mask2FormerImageProcessor",)), ("maskformer", ("MaskFormerImageProcessor",)), ("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")), diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 2b49c29597..5643246ec4 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -308,8 +308,8 @@ MODEL_FOR_PRETRAINING_MAPPING_NAMES = OrderedDict( ("idefics2", "Idefics2ForConditionalGeneration"), ("layoutlm", "LayoutLMForMaskedLM"), ("llava", "LlavaForConditionalGeneration"), - ("llava-next-video", "LlavaNextVideoForConditionalGeneration"), ("llava_next", "LlavaNextForConditionalGeneration"), + ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), ("longformer", "LongformerForMaskedLM"), ("luke", "LukeForMaskedLM"), ("lxmert", "LxmertForPreTraining"), @@ -721,8 +721,8 @@ MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict( ("instructblipvideo", "InstructBlipVideoForConditionalGeneration"), ("kosmos-2", "Kosmos2ForConditionalGeneration"), ("llava", "LlavaForConditionalGeneration"), - ("llava-next-video", "LlavaNextVideoForConditionalGeneration"), ("llava_next", "LlavaNextForConditionalGeneration"), + ("llava_next_video", "LlavaNextVideoForConditionalGeneration"), ("paligemma", "PaliGemmaForConditionalGeneration"), ("pix2struct", "Pix2StructForConditionalGeneration"), ("video_llava", "VideoLlavaForConditionalGeneration"), diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py index 7877343d53..1c41b80abe 100644 --- a/src/transformers/models/auto/processing_auto.py +++ b/src/transformers/models/auto/processing_auto.py @@ -71,8 +71,8 @@ PROCESSOR_MAPPING_NAMES = OrderedDict( ("layoutlmv2", "LayoutLMv2Processor"), ("layoutlmv3", "LayoutLMv3Processor"), ("llava", "LlavaProcessor"), - ("llava-next-video", "LlavaNextVideoProcessor"), ("llava_next", "LlavaNextProcessor"), + ("llava_next_video", "LlavaNextVideoProcessor"), ("markuplm", "MarkupLMProcessor"), ("mctct", "MCTCTProcessor"), ("mgp-str", "MgpstrProcessor"), diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index a0f4e4f449..b094f50b5e 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -257,8 +257,8 @@ else: ), ), ("llava", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)), - ("llava-next-video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)), ("llava_next", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)), + ("llava_next_video", ("LlamaTokenizer", "LlamaTokenizerFast" if is_tokenizers_available() else None)), ("longformer", ("LongformerTokenizer", "LongformerTokenizerFast" if is_tokenizers_available() else None)), ( "longt5",