Add support for fine-tuning CLIP-like models using contrastive-image-text example (#29070)

* add support for siglip and chinese-clip model training with contrastive-image-text example

* codebase fixups
This commit is contained in:
Taylor Jackle Spriggs
2024-02-20 05:08:31 -07:00
committed by GitHub
parent 0996a10077
commit ee3af60be0
6 changed files with 20 additions and 7 deletions

View File

@@ -1070,6 +1070,7 @@ MODELS_NOT_IN_README = [
"VisionTextDualEncoder",
"CLIPVisionModel",
"SiglipVisionModel",
"ChineseCLIPVisionModel",
]
# Template for new entries to add in the main README when we have missing models.