Add support for fine-tuning CLIP-like models using contrastive-image-text example (#29070)

* add support for siglip and chinese-clip model training with contrastive-image-text example

* codebase fixups
This commit is contained in:
Taylor Jackle Spriggs
2024-02-20 05:08:31 -07:00
committed by GitHub
parent 0996a10077
commit ee3af60be0
6 changed files with 20 additions and 7 deletions

View File

@@ -171,7 +171,7 @@ MODEL_NAMES_WITH_SAME_CONFIG = {
"XLS-R": "Wav2Vec2",
"XLSR-Wav2Vec2": "Wav2Vec2",
}
MODEL_NAMES_TO_IGNORE = ["CLIPVisionModel", "SiglipVisionModel"]
MODEL_NAMES_TO_IGNORE = ["CLIPVisionModel", "SiglipVisionModel", "ChineseCLIPVisionModel"]
def get_model_table_from_auto_modules() -> str: