Add SeamlessM4T v2 (#27779)
* add working convertion script * first non-working version of modeling code * update modeling code (working) * make style * make fix-copies * add config docstrings * add config to ignore docstrings formatage due to unconventional markdown * fix copies * fix generation num_return_sequences * enrich docs * add and fix tests beside integration tests * update integration tests * update repo id * add tie weights and make style * correct naming in .md * fix imports and so on * correct docstrings * fix fp16 speech forward * fix speechencoder attention * make style * fix copied from * rename SeamlessM4Tv2-v2 to SeamlessM4Tv2 * Apply suggestions on configuration Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * remove useless public models * fix private models + better naming for T2U models * clean speech encoder relative position embeddings * refactor chunk attention * add docstrings to chunk attention method * improve naming and docstrings * rename some attention variables + add temperature sampling in T2U model * rename DOCSTRINGS variable names * make style + remove 2 useless config parameters * enrich model card * remove any attention_head reference + fix temperature in T2U * new fmt and make style * Apply suggestions from code review Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * rename spkr_id->speaker_id and change docstrings of get_char_input_ids * simplify v2attention * make style * Update seamless_m4t_v2.md * update code and tests with last update * update repo ids * fill article name, abstract andauthors * update not_doctested and slow_doc tests --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
@@ -96,6 +96,21 @@ SPECIAL_CASES_TO_ALLOW = {
|
||||
"t2u_encoder_layers",
|
||||
"t2u_max_position_embeddings",
|
||||
],
|
||||
# Actually used in the config or generation config, in that case necessary for the sub-components generation
|
||||
"SeamlessM4Tv2Config": [
|
||||
"max_new_tokens",
|
||||
"t2u_decoder_attention_heads",
|
||||
"t2u_decoder_ffn_dim",
|
||||
"t2u_decoder_layers",
|
||||
"t2u_encoder_attention_heads",
|
||||
"t2u_encoder_ffn_dim",
|
||||
"t2u_encoder_layers",
|
||||
"t2u_max_position_embeddings",
|
||||
"t2u_variance_pred_dropout",
|
||||
"t2u_variance_predictor_embed_dim",
|
||||
"t2u_variance_predictor_hidden_dim",
|
||||
"t2u_variance_predictor_kernel_size",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -463,6 +463,7 @@ OBJECTS_TO_IGNORE = [
|
||||
"SamConfig",
|
||||
"SamPromptEncoderConfig",
|
||||
"SeamlessM4TConfig", # use of unconventional markdown
|
||||
"SeamlessM4Tv2Config", # use of unconventional markdown
|
||||
"Seq2SeqTrainingArguments",
|
||||
"SpecialTokensMixin",
|
||||
"Speech2Text2Config",
|
||||
|
||||
@@ -76,6 +76,9 @@ PRIVATE_MODELS = [
|
||||
"Kosmos2TextModel",
|
||||
"Kosmos2TextForCausalLM",
|
||||
"Kosmos2VisionModel",
|
||||
"SeamlessM4Tv2TextToUnitModel",
|
||||
"SeamlessM4Tv2CodeHifiGan",
|
||||
"SeamlessM4Tv2TextToUnitForConditionalGeneration",
|
||||
]
|
||||
|
||||
# Update this list for models that are not tested with a comment explaining the reason it should not be.
|
||||
@@ -296,6 +299,10 @@ IGNORE_NON_AUTO_CONFIGURED = PRIVATE_MODELS.copy() + [
|
||||
"SeamlessM4TCodeHifiGan",
|
||||
"SeamlessM4TForSpeechToSpeech", # no auto class for speech-to-speech
|
||||
"TvpForVideoGrounding",
|
||||
"SeamlessM4Tv2NARTextToUnitModel",
|
||||
"SeamlessM4Tv2NARTextToUnitForConditionalGeneration",
|
||||
"SeamlessM4Tv2CodeHifiGan",
|
||||
"SeamlessM4Tv2ForSpeechToSpeech", # no auto class for speech-to-speech
|
||||
]
|
||||
|
||||
# DO NOT edit this list!
|
||||
|
||||
@@ -776,6 +776,7 @@ src/transformers/models/sam/modeling_sam.py
|
||||
src/transformers/models/sam/modeling_tf_sam.py
|
||||
src/transformers/models/sam/processing_sam.py
|
||||
src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py
|
||||
src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py
|
||||
src/transformers/models/segformer/configuration_segformer.py
|
||||
src/transformers/models/segformer/convert_segformer_original_to_pytorch.py
|
||||
src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py
|
||||
|
||||
@@ -2,6 +2,7 @@ docs/source/en/generation_strategies.md
|
||||
docs/source/en/model_doc/ctrl.md
|
||||
docs/source/en/model_doc/kosmos-2.md
|
||||
docs/source/en/model_doc/seamless_m4t.md
|
||||
docs/source/en/model_doc/seamless_m4t_v2.md
|
||||
docs/source/en/task_summary.md
|
||||
docs/source/en/tasks/prompting.md
|
||||
src/transformers/models/blip_2/modeling_blip_2.py
|
||||
|
||||
Reference in New Issue
Block a user