diff --git a/Makefile b/Makefile index 17315c8a9e..8a9470b9af 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,7 @@ repo-consistency: python utils/check_doctest_list.py python utils/update_metadata.py --check-only python utils/check_docstrings.py + python utils/add_dates.py # this target runs checks on all files diff --git a/docs/source/en/model_doc/aimv2.md b/docs/source/en/model_doc/aimv2.md index 1c05c5068d..d5cac85e0a 100644 --- a/docs/source/en/model_doc/aimv2.md +++ b/docs/source/en/model_doc/aimv2.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-11-21 and added to Hugging Face Transformers on 2025-07-08.* # AIMv2 ## Overview -The AIMv2 model was proposed in [Multimodal Autoregressive Pre-training of Large Vision Encoders](https://arxiv.org/abs/2411.14402) by Enrico Fini, Mustafa Shukor, Xiujun Li, Philipp Dufter, Michal Klein, David Haldimann, Sai Aitharaju, Victor Guilherme Turrisi da Costa, Louis Béthune, Zhe Gan, Alexander T Toshev, Marcin Eichner, Moin Nabi, Yinfei Yang, Joshua M. Susskind, Alaaeldin El-Nouby. +The AIMv2 model was proposed in [Multimodal Autoregressive Pre-training of Large Vision Encoders](https://huggingface.co/papers/2411.14402) by Enrico Fini, Mustafa Shukor, Xiujun Li, Philipp Dufter, Michal Klein, David Haldimann, Sai Aitharaju, Victor Guilherme Turrisi da Costa, Louis Béthune, Zhe Gan, Alexander T Toshev, Marcin Eichner, Moin Nabi, Yinfei Yang, Joshua M. Susskind, Alaaeldin El-Nouby. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/albert.md b/docs/source/en/model_doc/albert.md index 49d207fe57..2c8b7722c5 100644 --- a/docs/source/en/model_doc/albert.md +++ b/docs/source/en/model_doc/albert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-09-26 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/align.md b/docs/source/en/model_doc/align.md index 597f7e114e..3b531bcdd4 100644 --- a/docs/source/en/model_doc/align.md +++ b/docs/source/en/model_doc/align.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-02-11 and added to Hugging Face Transformers on 2023-03-01.*
PyTorch diff --git a/docs/source/en/model_doc/altclip.md b/docs/source/en/model_doc/altclip.md index 4d04173df7..2da9f5a781 100644 --- a/docs/source/en/model_doc/altclip.md +++ b/docs/source/en/model_doc/altclip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-12 and added to Hugging Face Transformers on 2023-01-04.*
diff --git a/docs/source/en/model_doc/arcee.md b/docs/source/en/model_doc/arcee.md index 520e9a05bf..3df4c382fc 100644 --- a/docs/source/en/model_doc/arcee.md +++ b/docs/source/en/model_doc/arcee.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-18 and added to Hugging Face Transformers on 2025-06-24.*
@@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. # Arcee -Arcee is a decoder-only transformer model based on the Llama architecture with a key modification: it uses ReLU² (ReLU-squared) activation in the MLP blocks instead of SiLU, following recent research showing improved training efficiency with squared activations. This architecture is designed for efficient training and inference while maintaining the proven stability of the Llama design. +[Arcee](https://www.arcee.ai/blog/deep-dive-afm-4-5b-the-first-arcee-foundational-model) is a decoder-only transformer model based on the Llama architecture with a key modification: it uses ReLU² (ReLU-squared) activation in the MLP blocks instead of SiLU, following recent research showing improved training efficiency with squared activations. This architecture is designed for efficient training and inference while maintaining the proven stability of the Llama design. The Arcee model is architecturally similar to Llama but uses `x * relu(x)` in MLP layers for improved gradient flow and is optimized for efficiency in both training and inference scenarios. diff --git a/docs/source/en/model_doc/aria.md b/docs/source/en/model_doc/aria.md index 1c974bf5e2..bb86257f22 100644 --- a/docs/source/en/model_doc/aria.md +++ b/docs/source/en/model_doc/aria.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-08 and added to Hugging Face Transformers on 2024-12-06.*
diff --git a/docs/source/en/model_doc/audio-spectrogram-transformer.md b/docs/source/en/model_doc/audio-spectrogram-transformer.md index 46544de1f6..93d7416a41 100644 --- a/docs/source/en/model_doc/audio-spectrogram-transformer.md +++ b/docs/source/en/model_doc/audio-spectrogram-transformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-05 and added to Hugging Face Transformers on 2022-11-21.* # Audio Spectrogram Transformer diff --git a/docs/source/en/model_doc/autoformer.md b/docs/source/en/model_doc/autoformer.md index 0fd3890132..af7087d375 100644 --- a/docs/source/en/model_doc/autoformer.md +++ b/docs/source/en/model_doc/autoformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-24 and added to Hugging Face Transformers on 2023-05-30.* # Autoformer diff --git a/docs/source/en/model_doc/aya_vision.md b/docs/source/en/model_doc/aya_vision.md index 4b10f99fa9..a71bba14d5 100644 --- a/docs/source/en/model_doc/aya_vision.md +++ b/docs/source/en/model_doc/aya_vision.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-05-13 and added to Hugging Face Transformers on 2025-03-04.*
diff --git a/docs/source/en/model_doc/bamba.md b/docs/source/en/model_doc/bamba.md index 81f8f79a58..43ad9ddfcf 100644 --- a/docs/source/en/model_doc/bamba.md +++ b/docs/source/en/model_doc/bamba.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-18 and added to Hugging Face Transformers on 2024-12-19.*
diff --git a/docs/source/en/model_doc/bark.md b/docs/source/en/model_doc/bark.md index 912f552fa7..6f39f58762 100644 --- a/docs/source/en/model_doc/bark.md +++ b/docs/source/en/model_doc/bark.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-04-09 and added to Hugging Face Transformers on 2023-07-17.* # Bark @@ -19,7 +20,7 @@ specific language governing permissions and limitations under the License. ## Overview -Bark is a transformer-based text-to-speech model proposed by Suno AI in [suno-ai/bark](https://github.com/suno-ai/bark). +[Bark](https://huggingface.co/suno/bark) is a transformer-based text-to-speech model proposed by Suno AI in [suno-ai/bark](https://github.com/suno-ai/bark). Bark is made of 4 main models: diff --git a/docs/source/en/model_doc/bart.md b/docs/source/en/model_doc/bart.md index d269b391cc..b0fc75828d 100644 --- a/docs/source/en/model_doc/bart.md +++ b/docs/source/en/model_doc/bart.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-10-29 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/barthez.md b/docs/source/en/model_doc/barthez.md index fdaf28c8d7..4d8f23c5f5 100644 --- a/docs/source/en/model_doc/barthez.md +++ b/docs/source/en/model_doc/barthez.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-23 and added to Hugging Face Transformers on 2020-11-27.*
diff --git a/docs/source/en/model_doc/bartpho.md b/docs/source/en/model_doc/bartpho.md index 2c39f31135..df6101a905 100644 --- a/docs/source/en/model_doc/bartpho.md +++ b/docs/source/en/model_doc/bartpho.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-20 and added to Hugging Face Transformers on 2021-10-18.*
diff --git a/docs/source/en/model_doc/beit.md b/docs/source/en/model_doc/beit.md index 32a0c160a1..6bcd70c0fd 100644 --- a/docs/source/en/model_doc/beit.md +++ b/docs/source/en/model_doc/beit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-15 and added to Hugging Face Transformers on 2021-08-04.* # BEiT diff --git a/docs/source/en/model_doc/bert-generation.md b/docs/source/en/model_doc/bert-generation.md index a14966ce3a..28a6919a98 100644 --- a/docs/source/en/model_doc/bert-generation.md +++ b/docs/source/en/model_doc/bert-generation.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-07-29 and added to Hugging Face Transformers on 2020-11-16.* # BertGeneration diff --git a/docs/source/en/model_doc/bert-japanese.md b/docs/source/en/model_doc/bert-japanese.md index 33a720318b..380d9e3e2b 100644 --- a/docs/source/en/model_doc/bert-japanese.md +++ b/docs/source/en/model_doc/bert-japanese.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-03-24 and added to Hugging Face Transformers on 2020-11-16.* # BertJapanese diff --git a/docs/source/en/model_doc/bert.md b/docs/source/en/model_doc/bert.md index 8ec99b14ab..2185c6bdcd 100644 --- a/docs/source/en/model_doc/bert.md +++ b/docs/source/en/model_doc/bert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2018-10-11 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/bertweet.md b/docs/source/en/model_doc/bertweet.md index f1f6ff877b..6a031dfe1a 100644 --- a/docs/source/en/model_doc/bertweet.md +++ b/docs/source/en/model_doc/bertweet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-05-20 and added to Hugging Face Transformers on 2020-11-16.* # BERTweet diff --git a/docs/source/en/model_doc/big_bird.md b/docs/source/en/model_doc/big_bird.md index 16f99043c6..c311dba4d2 100644 --- a/docs/source/en/model_doc/big_bird.md +++ b/docs/source/en/model_doc/big_bird.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-07-28 and added to Hugging Face Transformers on 2021-03-30.*
diff --git a/docs/source/en/model_doc/bigbird_pegasus.md b/docs/source/en/model_doc/bigbird_pegasus.md index bf9b417543..56275dd0ff 100644 --- a/docs/source/en/model_doc/bigbird_pegasus.md +++ b/docs/source/en/model_doc/bigbird_pegasus.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-07-28 and added to Hugging Face Transformers on 2021-05-07.*
diff --git a/docs/source/en/model_doc/biogpt.md b/docs/source/en/model_doc/biogpt.md index 0b6eb87764..8da62d60b3 100644 --- a/docs/source/en/model_doc/biogpt.md +++ b/docs/source/en/model_doc/biogpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-10-19 and added to Hugging Face Transformers on 2022-12-05.*
diff --git a/docs/source/en/model_doc/bit.md b/docs/source/en/model_doc/bit.md index ea0c09b862..5a6630566f 100644 --- a/docs/source/en/model_doc/bit.md +++ b/docs/source/en/model_doc/bit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-12-24 and added to Hugging Face Transformers on 2022-12-07.* # Big Transfer (BiT) diff --git a/docs/source/en/model_doc/bitnet.md b/docs/source/en/model_doc/bitnet.md index 2bf2b8e7b2..992bb0b7d8 100644 --- a/docs/source/en/model_doc/bitnet.md +++ b/docs/source/en/model_doc/bitnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-16 and added to Hugging Face Transformers on 2025-04-28.* # BitNet diff --git a/docs/source/en/model_doc/blenderbot-small.md b/docs/source/en/model_doc/blenderbot-small.md index 181fd01342..8cfb47f643 100644 --- a/docs/source/en/model_doc/blenderbot-small.md +++ b/docs/source/en/model_doc/blenderbot-small.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-28 and added to Hugging Face Transformers on 2021-01-05.* # Blenderbot Small diff --git a/docs/source/en/model_doc/blenderbot.md b/docs/source/en/model_doc/blenderbot.md index cea6c49c36..d4aad114c8 100644 --- a/docs/source/en/model_doc/blenderbot.md +++ b/docs/source/en/model_doc/blenderbot.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-28 and added to Hugging Face Transformers on 2020-11-16.* # Blenderbot diff --git a/docs/source/en/model_doc/blip-2.md b/docs/source/en/model_doc/blip-2.md index fbfcda4613..60e15b130c 100644 --- a/docs/source/en/model_doc/blip-2.md +++ b/docs/source/en/model_doc/blip-2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-01-30 and added to Hugging Face Transformers on 2023-02-09.* # BLIP-2 diff --git a/docs/source/en/model_doc/blip.md b/docs/source/en/model_doc/blip.md index a8d4c5a14b..b308fd0fa3 100644 --- a/docs/source/en/model_doc/blip.md +++ b/docs/source/en/model_doc/blip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-01-28 and added to Hugging Face Transformers on 2022-12-21.*
diff --git a/docs/source/en/model_doc/bloom.md b/docs/source/en/model_doc/bloom.md index 9de9870595..a5cce1a7e8 100644 --- a/docs/source/en/model_doc/bloom.md +++ b/docs/source/en/model_doc/bloom.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-09 and added to Hugging Face Transformers on 2022-06-09.* # BLOOM @@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. ## Overview -The BLOOM model has been proposed with its various versions through the [BigScience Workshop](https://bigscience.huggingface.co/). BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact. +The [BLOOM](https://huggingface.co/papers/2211.05100) model has been proposed with its various versions through the [BigScience Workshop](https://bigscience.huggingface.co/). BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact. The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages. Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions: diff --git a/docs/source/en/model_doc/bort.md b/docs/source/en/model_doc/bort.md index 5d5b923906..286559227e 100644 --- a/docs/source/en/model_doc/bort.md +++ b/docs/source/en/model_doc/bort.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-20 and added to Hugging Face Transformers on 2023-06-20.* # BORT diff --git a/docs/source/en/model_doc/bridgetower.md b/docs/source/en/model_doc/bridgetower.md index fe63453523..6a2b09e263 100644 --- a/docs/source/en/model_doc/bridgetower.md +++ b/docs/source/en/model_doc/bridgetower.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-06-17 and added to Hugging Face Transformers on 2023-01-25.* # BridgeTower diff --git a/docs/source/en/model_doc/bros.md b/docs/source/en/model_doc/bros.md index 9ef37e8ea7..aeb3dd76e5 100644 --- a/docs/source/en/model_doc/bros.md +++ b/docs/source/en/model_doc/bros.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2021-08-10 and added to Hugging Face Transformers on 2023-09-15.* # BROS diff --git a/docs/source/en/model_doc/byt5.md b/docs/source/en/model_doc/byt5.md index 25340f15c5..e92a0f5d4b 100644 --- a/docs/source/en/model_doc/byt5.md +++ b/docs/source/en/model_doc/byt5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-05-28 and added to Hugging Face Transformers on 2021-06-01.*
PyTorch diff --git a/docs/source/en/model_doc/camembert.md b/docs/source/en/model_doc/camembert.md index efa57e1704..1b3eeed66f 100644 --- a/docs/source/en/model_doc/camembert.md +++ b/docs/source/en/model_doc/camembert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-11-10 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/canine.md b/docs/source/en/model_doc/canine.md index 2d37a45b48..e1d8bb7f7f 100644 --- a/docs/source/en/model_doc/canine.md +++ b/docs/source/en/model_doc/canine.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-11 and added to Hugging Face Transformers on 2021-06-30.*
diff --git a/docs/source/en/model_doc/chameleon.md b/docs/source/en/model_doc/chameleon.md index b0265b1b72..973330cf21 100644 --- a/docs/source/en/model_doc/chameleon.md +++ b/docs/source/en/model_doc/chameleon.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-16 and added to Hugging Face Transformers on 2024-07-17.* # Chameleon diff --git a/docs/source/en/model_doc/chinese_clip.md b/docs/source/en/model_doc/chinese_clip.md index 2607c56e5e..7ed4d503c0 100644 --- a/docs/source/en/model_doc/chinese_clip.md +++ b/docs/source/en/model_doc/chinese_clip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-02 and added to Hugging Face Transformers on 2022-12-01.* # Chinese-CLIP diff --git a/docs/source/en/model_doc/clap.md b/docs/source/en/model_doc/clap.md index 4c0fca22bb..c35a7fa9ed 100644 --- a/docs/source/en/model_doc/clap.md +++ b/docs/source/en/model_doc/clap.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-12 and added to Hugging Face Transformers on 2023-02-16.*
diff --git a/docs/source/en/model_doc/clip.md b/docs/source/en/model_doc/clip.md index 4ab9fe3f21..53a0261a17 100644 --- a/docs/source/en/model_doc/clip.md +++ b/docs/source/en/model_doc/clip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-02-26 and added to Hugging Face Transformers on 2021-05-12.*
diff --git a/docs/source/en/model_doc/clipseg.md b/docs/source/en/model_doc/clipseg.md index afc357b2ca..e27d49ffe4 100644 --- a/docs/source/en/model_doc/clipseg.md +++ b/docs/source/en/model_doc/clipseg.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-12-18 and added to Hugging Face Transformers on 2022-11-08.* # CLIPSeg diff --git a/docs/source/en/model_doc/clvp.md b/docs/source/en/model_doc/clvp.md index 7d3f18b34d..926438a3c1 100644 --- a/docs/source/en/model_doc/clvp.md +++ b/docs/source/en/model_doc/clvp.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-05-12 and added to Hugging Face Transformers on 2023-11-10.* # CLVP diff --git a/docs/source/en/model_doc/code_llama.md b/docs/source/en/model_doc/code_llama.md index 3be25fb203..c578973f2e 100644 --- a/docs/source/en/model_doc/code_llama.md +++ b/docs/source/en/model_doc/code_llama.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-08-24 and added to Hugging Face Transformers on 2023-08-25.*
diff --git a/docs/source/en/model_doc/codegen.md b/docs/source/en/model_doc/codegen.md index 73890f13d6..e5ad3863b6 100644 --- a/docs/source/en/model_doc/codegen.md +++ b/docs/source/en/model_doc/codegen.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-03-25 and added to Hugging Face Transformers on 2022-06-24.* # CodeGen diff --git a/docs/source/en/model_doc/cohere.md b/docs/source/en/model_doc/cohere.md index 08087b14c4..b57eb5e42b 100644 --- a/docs/source/en/model_doc/cohere.md +++ b/docs/source/en/model_doc/cohere.md @@ -1,3 +1,18 @@ + +*This model was released on 2024-03-12 and added to Hugging Face Transformers on 2024-03-15.* +
PyTorch @@ -10,7 +25,7 @@ # Cohere -Cohere Command-R is a 35B parameter multilingual large language model designed for long context tasks like retrieval-augmented generation (RAG) and calling external APIs and tools. The model is specifically trained for grounded generation and supports both single-step and multi-step tool use. It supports a context length of 128K tokens. +Cohere [Command-R](https://cohere.com/blog/command-r) is a 35B parameter multilingual large language model designed for long context tasks like retrieval-augmented generation (RAG) and calling external APIs and tools. The model is specifically trained for grounded generation and supports both single-step and multi-step tool use. It supports a context length of 128K tokens. You can find all the original Command-R checkpoints under the [Command Models](https://huggingface.co/collections/CohereForAI/command-models-67652b401665205e17b192ad) collection. diff --git a/docs/source/en/model_doc/cohere2.md b/docs/source/en/model_doc/cohere2.md index a4836e7790..5163951024 100644 --- a/docs/source/en/model_doc/cohere2.md +++ b/docs/source/en/model_doc/cohere2.md @@ -1,3 +1,18 @@ + +*This model was released on 2024-12-13 and added to Hugging Face Transformers on 2024-12-13.* +
PyTorch @@ -8,7 +23,7 @@
-# Cohere2 +# Cohere 2 [Cohere Command R7B](https://cohere.com/blog/command-r7b) is an open weights research release of a 7B billion parameter model. It is a multilingual model trained on 23 languages and has a context window of 128k. The model features three layers with sliding window attention and ROPE for efficient local context modeling and relative positional encoding. A fourth layer uses global attention without positional embeddings, enabling unrestricted token interactions across the entire sequence. diff --git a/docs/source/en/model_doc/cohere2_vision.md b/docs/source/en/model_doc/cohere2_vision.md index b0fcddc6d3..bd01992e85 100644 --- a/docs/source/en/model_doc/cohere2_vision.md +++ b/docs/source/en/model_doc/cohere2_vision.md @@ -1,3 +1,20 @@ + +*This model was released on 2025-07-31 and added to Hugging Face Transformers on 2025-07-31.* + # Command A Vision
@@ -9,7 +26,7 @@ ## Overview -Command A Vision is a state-of-the-art multimodal model designed to seamlessly integrate visual and textual information for a wide range of applications. By combining advanced computer vision techniques with natural language processing capabilities, Command A Vision enables users to analyze, understand, and generate insights from both visual and textual data. +Command A Vision ([blog post](https://cohere.com/blog/command-a-vision)) is a state-of-the-art multimodal model designed to seamlessly integrate visual and textual information for a wide range of applications. By combining advanced computer vision techniques with natural language processing capabilities, Command A Vision enables users to analyze, understand, and generate insights from both visual and textual data. The model excels at tasks including image captioning, visual question answering, document understanding, and chart understanding. This makes it a versatile tool for AI practitioners. Its ability to process complex visual and textual inputs makes it useful in settings where text-only representations are imprecise or unavailable, like real-world image understanding and graphics-heavy document processing. diff --git a/docs/source/en/model_doc/colpali.md b/docs/source/en/model_doc/colpali.md index e9c4db5ef0..12dd9189ea 100644 --- a/docs/source/en/model_doc/colpali.md +++ b/docs/source/en/model_doc/colpali.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. ⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2024-06-27 and added to Hugging Face Transformers on 2024-12-17.*
diff --git a/docs/source/en/model_doc/colqwen2.md b/docs/source/en/model_doc/colqwen2.md index 654a6129a3..a8249bc5c5 100644 --- a/docs/source/en/model_doc/colqwen2.md +++ b/docs/source/en/model_doc/colqwen2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-06-27 and added to Hugging Face Transformers on 2025-06-02.*
diff --git a/docs/source/en/model_doc/conditional_detr.md b/docs/source/en/model_doc/conditional_detr.md index 68eda90e70..f5297e49ee 100644 --- a/docs/source/en/model_doc/conditional_detr.md +++ b/docs/source/en/model_doc/conditional_detr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-08-13 and added to Hugging Face Transformers on 2022-09-22.* # Conditional DETR diff --git a/docs/source/en/model_doc/convbert.md b/docs/source/en/model_doc/convbert.md index 62d9d11688..1fbfb80963 100644 --- a/docs/source/en/model_doc/convbert.md +++ b/docs/source/en/model_doc/convbert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-08-06 and added to Hugging Face Transformers on 2021-01-27.* # ConvBERT diff --git a/docs/source/en/model_doc/convnext.md b/docs/source/en/model_doc/convnext.md index 5a65c9f6cc..dc05d3ed32 100644 --- a/docs/source/en/model_doc/convnext.md +++ b/docs/source/en/model_doc/convnext.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-01-10 and added to Hugging Face Transformers on 2022-02-07.* # ConvNeXT diff --git a/docs/source/en/model_doc/convnextv2.md b/docs/source/en/model_doc/convnextv2.md index 4779c511fe..531b5a9dcf 100644 --- a/docs/source/en/model_doc/convnextv2.md +++ b/docs/source/en/model_doc/convnextv2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-01-02 and added to Hugging Face Transformers on 2023-03-14.* # ConvNeXt V2 diff --git a/docs/source/en/model_doc/cpm.md b/docs/source/en/model_doc/cpm.md index e639622087..0fe2caa6a4 100644 --- a/docs/source/en/model_doc/cpm.md +++ b/docs/source/en/model_doc/cpm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-12-01 and added to Hugging Face Transformers on 2021-04-10.* # CPM diff --git a/docs/source/en/model_doc/cpmant.md b/docs/source/en/model_doc/cpmant.md index f8e2b3b515..6f13f785ac 100644 --- a/docs/source/en/model_doc/cpmant.md +++ b/docs/source/en/model_doc/cpmant.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-16 and added to Hugging Face Transformers on 2023-04-12.* # CPMAnt diff --git a/docs/source/en/model_doc/csm.md b/docs/source/en/model_doc/csm.md index 0abd7a73a5..21931eec82 100644 --- a/docs/source/en/model_doc/csm.md +++ b/docs/source/en/model_doc/csm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-02-27 and added to Hugging Face Transformers on 2025-05-07.* # Csm diff --git a/docs/source/en/model_doc/ctrl.md b/docs/source/en/model_doc/ctrl.md index 4b5fee2b0a..a0ce5434d6 100644 --- a/docs/source/en/model_doc/ctrl.md +++ b/docs/source/en/model_doc/ctrl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-09-11 and added to Hugging Face Transformers on 2020-11-16.* # CTRL diff --git a/docs/source/en/model_doc/cvt.md b/docs/source/en/model_doc/cvt.md index 3332e832c2..9ccdb58664 100644 --- a/docs/source/en/model_doc/cvt.md +++ b/docs/source/en/model_doc/cvt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-29 and added to Hugging Face Transformers on 2022-05-18.*
@@ -23,7 +24,7 @@ rendered properly in your Markdown viewer. # Convolutional Vision Transformer (CvT) -Convolutional Vision Transformer (CvT) is a model that combines the strengths of convolutional neural networks (CNNs) and Vision transformers for the computer vision tasks. It introduces convolutional layers into the vision transformer architecture, allowing it to capture local patterns in images while maintaining the global context provided by self-attention mechanisms. +[Convolutional Vision Transformer (CvT)](https://huggingface.co/papers/2103.15808) is a model that combines the strengths of convolutional neural networks (CNNs) and Vision transformers for the computer vision tasks. It introduces convolutional layers into the vision transformer architecture, allowing it to capture local patterns in images while maintaining the global context provided by self-attention mechanisms. You can find all the CvT checkpoints under the [Microsoft](https://huggingface.co/microsoft?search_models=cvt) organization. diff --git a/docs/source/en/model_doc/d_fine.md b/docs/source/en/model_doc/d_fine.md index b0ed576508..9dffde75eb 100644 --- a/docs/source/en/model_doc/d_fine.md +++ b/docs/source/en/model_doc/d_fine.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-17 and added to Hugging Face Transformers on 2025-04-29.* # D-FINE diff --git a/docs/source/en/model_doc/dab-detr.md b/docs/source/en/model_doc/dab-detr.md index 0f9e8dc3f3..32b27d4b24 100644 --- a/docs/source/en/model_doc/dab-detr.md +++ b/docs/source/en/model_doc/dab-detr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-01-28 and added to Hugging Face Transformers on 2025-02-04.* # DAB-DETR diff --git a/docs/source/en/model_doc/dac.md b/docs/source/en/model_doc/dac.md index e8408db502..e17cc69fc3 100644 --- a/docs/source/en/model_doc/dac.md +++ b/docs/source/en/model_doc/dac.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-11 and added to Hugging Face Transformers on 2024-08-19.* # DAC diff --git a/docs/source/en/model_doc/data2vec.md b/docs/source/en/model_doc/data2vec.md index f2df85e244..ed2b01622f 100644 --- a/docs/source/en/model_doc/data2vec.md +++ b/docs/source/en/model_doc/data2vec.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-02-07 and added to Hugging Face Transformers on 2022-03-01.* # Data2Vec diff --git a/docs/source/en/model_doc/dbrx.md b/docs/source/en/model_doc/dbrx.md index 11463e93d1..710c5e3898 100644 --- a/docs/source/en/model_doc/dbrx.md +++ b/docs/source/en/model_doc/dbrx.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2024-03-27 and added to Hugging Face Transformers on 2024-04-18.* # DBRX diff --git a/docs/source/en/model_doc/deberta-v2.md b/docs/source/en/model_doc/deberta-v2.md index 004a4afda6..db7e9c0561 100644 --- a/docs/source/en/model_doc/deberta-v2.md +++ b/docs/source/en/model_doc/deberta-v2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-05 and added to Hugging Face Transformers on 2021-02-19.*
diff --git a/docs/source/en/model_doc/deberta.md b/docs/source/en/model_doc/deberta.md index 5991a0c085..b5f18b6430 100644 --- a/docs/source/en/model_doc/deberta.md +++ b/docs/source/en/model_doc/deberta.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-05 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/decision_transformer.md b/docs/source/en/model_doc/decision_transformer.md index 6f820336b5..cdfcd42f9a 100644 --- a/docs/source/en/model_doc/decision_transformer.md +++ b/docs/source/en/model_doc/decision_transformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-02 and added to Hugging Face Transformers on 2022-03-23.* # Decision Transformer diff --git a/docs/source/en/model_doc/deepseek_v2.md b/docs/source/en/model_doc/deepseek_v2.md index ed4876bd67..bcdf65fbe8 100644 --- a/docs/source/en/model_doc/deepseek_v2.md +++ b/docs/source/en/model_doc/deepseek_v2.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-07 and added to Hugging Face Transformers on 2025-07-09.* # DeepSeek-V2 ## Overview -The DeepSeek-V2 model was proposed in [DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model](https://arxiv.org/abs/2405.04434) by DeepSeek-AI Team. +The DeepSeek-V2 model was proposed in [DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model](https://huggingface.co/papers/2405.04434) by DeepSeek-AI Team. The abstract from the paper is the following: We present DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. It comprises 236B total parameters, of which 21B are activated for each token, and supports a context length of 128K tokens. DeepSeek-V2 adopts innovative architectures including Multi-head Latent Attention (MLA) and DeepSeekMoE. MLA guarantees efficient inference through significantly compressing the Key-Value (KV) cache into a latent vector, while DeepSeekMoE enables training strong models at an economical cost through sparse computation. Compared with DeepSeek 67B, DeepSeek-V2 achieves significantly stronger performance, and meanwhile saves 42.5% of training costs, reduces the KV cache by 93.3%, and boosts the maximum generation throughput to 5.76 times. We pretrain DeepSeek-V2 on a high-quality and multi-source corpus consisting of 8.1T tokens, and further perform Supervised Fine-Tuning (SFT) and Reinforcement Learning (RL) to fully unlock its potential. Evaluation results show that, even with only 21B activated parameters, DeepSeek-V2 and its chat versions still achieve top-tier performance among open-source models. diff --git a/docs/source/en/model_doc/deepseek_v3.md b/docs/source/en/model_doc/deepseek_v3.md index 9da98b5785..85008b1596 100644 --- a/docs/source/en/model_doc/deepseek_v3.md +++ b/docs/source/en/model_doc/deepseek_v3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-27 and added to Hugging Face Transformers on 2025-03-28.* # DeepSeek-V3 diff --git a/docs/source/en/model_doc/deepseek_vl.md b/docs/source/en/model_doc/deepseek_vl.md index b01ef7064a..863472118d 100644 --- a/docs/source/en/model_doc/deepseek_vl.md +++ b/docs/source/en/model_doc/deepseek_vl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-08 and added to Hugging Face Transformers on 2025-07-25.*
@@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. # DeepseekVL -[Deepseek-VL](https://arxiv.org/abs/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding images. +[Deepseek-VL](https://huggingface.co/papers/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding images. You can find all the original Deepseek-VL checkpoints under the [DeepSeek-community](https://huggingface.co/deepseek-community) organization. diff --git a/docs/source/en/model_doc/deepseek_vl_hybrid.md b/docs/source/en/model_doc/deepseek_vl_hybrid.md index e713782748..b9aec339ae 100644 --- a/docs/source/en/model_doc/deepseek_vl_hybrid.md +++ b/docs/source/en/model_doc/deepseek_vl_hybrid.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-08 and added to Hugging Face Transformers on 2025-07-25.*
@@ -23,7 +24,7 @@ rendered properly in your Markdown viewer. # DeepseekVLHybrid -[Deepseek-VL-Hybrid](https://arxiv.org/abs/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding low-resolution images and [SAM (Segment Anything Model)](./sam) is incorporated to handle high-resolution image encoding, enhancing the model’s ability to process fine-grained visual details. Deepseek-VL-Hybrid is a variant of Deepseek-VL that uses [SAM (Segment Anything Model)](./sam) to handle high-resolution image encoding. +[Deepseek-VL-Hybrid](https://huggingface.co/papers/2403.05525) was introduced by the DeepSeek AI team. It is a vision-language model (VLM) designed to process both text and images for generating contextually relevant responses. The model leverages [LLaMA](./llama) as its text encoder, while [SigLip](./siglip) is used for encoding low-resolution images and [SAM (Segment Anything Model)](./sam) is incorporated to handle high-resolution image encoding, enhancing the model’s ability to process fine-grained visual details. Deepseek-VL-Hybrid is a variant of Deepseek-VL that uses [SAM (Segment Anything Model)](./sam) to handle high-resolution image encoding. You can find all the original Deepseek-VL-Hybrid checkpoints under the [DeepSeek-community](https://huggingface.co/deepseek-community) organization. diff --git a/docs/source/en/model_doc/deformable_detr.md b/docs/source/en/model_doc/deformable_detr.md index 84c8de5496..a8c57f490d 100644 --- a/docs/source/en/model_doc/deformable_detr.md +++ b/docs/source/en/model_doc/deformable_detr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-08 and added to Hugging Face Transformers on 2022-09-14.*
diff --git a/docs/source/en/model_doc/deit.md b/docs/source/en/model_doc/deit.md index c2f0f17c06..7af7514d67 100644 --- a/docs/source/en/model_doc/deit.md +++ b/docs/source/en/model_doc/deit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-12-23 and added to Hugging Face Transformers on 2021-04-13.* # DeiT diff --git a/docs/source/en/model_doc/deplot.md b/docs/source/en/model_doc/deplot.md index 28a5c70940..651ddcef7f 100644 --- a/docs/source/en/model_doc/deplot.md +++ b/docs/source/en/model_doc/deplot.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-12-20 and added to Hugging Face Transformers on 2023-06-20.* # DePlot diff --git a/docs/source/en/model_doc/depth_anything.md b/docs/source/en/model_doc/depth_anything.md index ea52dea915..cf3c96d4ee 100644 --- a/docs/source/en/model_doc/depth_anything.md +++ b/docs/source/en/model_doc/depth_anything.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-01-19 and added to Hugging Face Transformers on 2024-01-25.*
diff --git a/docs/source/en/model_doc/depth_anything_v2.md b/docs/source/en/model_doc/depth_anything_v2.md index 413273b05d..e8637ba619 100644 --- a/docs/source/en/model_doc/depth_anything_v2.md +++ b/docs/source/en/model_doc/depth_anything_v2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-06-13 and added to Hugging Face Transformers on 2024-07-05.* # Depth Anything V2 diff --git a/docs/source/en/model_doc/depth_pro.md b/docs/source/en/model_doc/depth_pro.md index 84f350a2a0..33149f2d46 100644 --- a/docs/source/en/model_doc/depth_pro.md +++ b/docs/source/en/model_doc/depth_pro.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-02 and added to Hugging Face Transformers on 2025-02-10.* # DepthPro diff --git a/docs/source/en/model_doc/deta.md b/docs/source/en/model_doc/deta.md index c151734f92..0dda1c8917 100644 --- a/docs/source/en/model_doc/deta.md +++ b/docs/source/en/model_doc/deta.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-12-12 and added to Hugging Face Transformers on 2023-06-20.* # DETA diff --git a/docs/source/en/model_doc/detr.md b/docs/source/en/model_doc/detr.md index 81d692382c..86c42c8adf 100644 --- a/docs/source/en/model_doc/detr.md +++ b/docs/source/en/model_doc/detr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-05-26 and added to Hugging Face Transformers on 2021-06-09.*
diff --git a/docs/source/en/model_doc/dia.md b/docs/source/en/model_doc/dia.md index 07a241efe9..8e17804b98 100644 --- a/docs/source/en/model_doc/dia.md +++ b/docs/source/en/model_doc/dia.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-21 and added to Hugging Face Transformers on 2025-06-26.* # Dia @@ -26,7 +27,7 @@ rendered properly in your Markdown viewer. ## Overview -Dia is an open-source text-to-speech (TTS) model (1.6B parameters) developed by [Nari Labs](https://huggingface.co/nari-labs). +[Dia](https://github.com/nari-labs/dia) is an open-source text-to-speech (TTS) model (1.6B parameters) developed by [Nari Labs](https://huggingface.co/nari-labs). It can generate highly realistic dialogue from transcript including non-verbal communications such as laughter and coughing. Furthermore, emotion and tone control is also possible via audio conditioning (voice cloning). diff --git a/docs/source/en/model_doc/dialogpt.md b/docs/source/en/model_doc/dialogpt.md index 946c61b305..cce264d342 100644 --- a/docs/source/en/model_doc/dialogpt.md +++ b/docs/source/en/model_doc/dialogpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-11-01 and added to Hugging Face Transformers on 2020-11-16.* # DialoGPT diff --git a/docs/source/en/model_doc/diffllama.md b/docs/source/en/model_doc/diffllama.md index 83ea51ac12..406bae43c5 100644 --- a/docs/source/en/model_doc/diffllama.md +++ b/docs/source/en/model_doc/diffllama.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-07 and added to Hugging Face Transformers on 2025-01-07.* # DiffLlama diff --git a/docs/source/en/model_doc/dinat.md b/docs/source/en/model_doc/dinat.md index aab1c6388f..e6d3385003 100644 --- a/docs/source/en/model_doc/dinat.md +++ b/docs/source/en/model_doc/dinat.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-29 and added to Hugging Face Transformers on 2022-11-18.* # Dilated Neighborhood Attention Transformer diff --git a/docs/source/en/model_doc/dinov2.md b/docs/source/en/model_doc/dinov2.md index 749d20d006..11c0eceedb 100644 --- a/docs/source/en/model_doc/dinov2.md +++ b/docs/source/en/model_doc/dinov2.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-04-14 and added to Hugging Face Transformers on 2023-07-18.*
diff --git a/docs/source/en/model_doc/dinov2_with_registers.md b/docs/source/en/model_doc/dinov2_with_registers.md index 8bca569bc9..f89de76d21 100644 --- a/docs/source/en/model_doc/dinov2_with_registers.md +++ b/docs/source/en/model_doc/dinov2_with_registers.md @@ -6,6 +6,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-09-28 and added to Hugging Face Transformers on 2024-12-24.* # DINOv2 with Registers diff --git a/docs/source/en/model_doc/distilbert.md b/docs/source/en/model_doc/distilbert.md index 0b2162c5e0..a8ee2652e5 100644 --- a/docs/source/en/model_doc/distilbert.md +++ b/docs/source/en/model_doc/distilbert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-10-02 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/dit.md b/docs/source/en/model_doc/dit.md index a48c8e9110..a5fe5ca495 100644 --- a/docs/source/en/model_doc/dit.md +++ b/docs/source/en/model_doc/dit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-03-04 and added to Hugging Face Transformers on 2022-03-10.*
PyTorch diff --git a/docs/source/en/model_doc/doge.md b/docs/source/en/model_doc/doge.md index 76ffc390d5..6221940d5d 100644 --- a/docs/source/en/model_doc/doge.md +++ b/docs/source/en/model_doc/doge.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-27 and added to Hugging Face Transformers on 2025-07-08.* # Doge diff --git a/docs/source/en/model_doc/donut.md b/docs/source/en/model_doc/donut.md index fe2d2d4fe0..2a18e7b221 100644 --- a/docs/source/en/model_doc/donut.md +++ b/docs/source/en/model_doc/donut.md @@ -12,6 +12,7 @@ Unless required by applicable law or agreed to in writing, software distributed rendered properly in your Markdown viewer. specific language governing permissions and limitations under the License. --> +*This model was released on 2021-11-30 and added to Hugging Face Transformers on 2022-08-12.*
@@ -21,7 +22,7 @@ specific language governing permissions and limitations under the License. --> # Donut -[Donut (Document Understanding Transformer)](https://huggingface.co/papers2111.15664) is a visual document understanding model that doesn't require an Optical Character Recognition (OCR) engine. Unlike traditional approaches that extract text using OCR before processing, Donut employs an end-to-end Transformer-based architecture to directly analyze document images. This eliminates OCR-related inefficiencies making it more accurate and adaptable to diverse languages and formats. +[Donut (Document Understanding Transformer)](https://huggingface.co/papers/2111.15664) is a visual document understanding model that doesn't require an Optical Character Recognition (OCR) engine. Unlike traditional approaches that extract text using OCR before processing, Donut employs an end-to-end Transformer-based architecture to directly analyze document images. This eliminates OCR-related inefficiencies making it more accurate and adaptable to diverse languages and formats. Donut features vision encoder ([Swin](./swin)) and a text decoder ([BART](./bart)). Swin converts document images into embeddings and BART processes them into meaningful text sequences. diff --git a/docs/source/en/model_doc/dots1.md b/docs/source/en/model_doc/dots1.md index b6925cb29f..337cad8cb4 100644 --- a/docs/source/en/model_doc/dots1.md +++ b/docs/source/en/model_doc/dots1.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-06 and added to Hugging Face Transformers on 2025-06-25.* # dots.llm1 ## Overview -The `dots.llm1` model was proposed in [dots.llm1 technical report](https://www.arxiv.org/pdf/2506.05767) by rednote-hilab team. +The `dots.llm1` model was proposed in [dots.llm1 technical report](https://huggingface.co/papers/2506.05767) by rednote-hilab team. The abstract from the report is the following: diff --git a/docs/source/en/model_doc/dpr.md b/docs/source/en/model_doc/dpr.md index 4b3d3f4a26..3534009e17 100644 --- a/docs/source/en/model_doc/dpr.md +++ b/docs/source/en/model_doc/dpr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-10 and added to Hugging Face Transformers on 2020-11-16.* # DPR diff --git a/docs/source/en/model_doc/dpt.md b/docs/source/en/model_doc/dpt.md index a763e2af62..d4415dac83 100644 --- a/docs/source/en/model_doc/dpt.md +++ b/docs/source/en/model_doc/dpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-24 and added to Hugging Face Transformers on 2022-03-28.* # DPT diff --git a/docs/source/en/model_doc/efficientformer.md b/docs/source/en/model_doc/efficientformer.md index 31b1d37f0f..320834a439 100644 --- a/docs/source/en/model_doc/efficientformer.md +++ b/docs/source/en/model_doc/efficientformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-06-02 and added to Hugging Face Transformers on 2023-06-20.* # EfficientFormer diff --git a/docs/source/en/model_doc/efficientloftr.md b/docs/source/en/model_doc/efficientloftr.md index 9c6964295f..465961864c 100644 --- a/docs/source/en/model_doc/efficientloftr.md +++ b/docs/source/en/model_doc/efficientloftr.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-07 and added to Hugging Face Transformers on 2025-07-22.*
diff --git a/docs/source/en/model_doc/efficientnet.md b/docs/source/en/model_doc/efficientnet.md index e11eab612c..859923126a 100644 --- a/docs/source/en/model_doc/efficientnet.md +++ b/docs/source/en/model_doc/efficientnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-05-28 and added to Hugging Face Transformers on 2023-02-20.* # EfficientNet diff --git a/docs/source/en/model_doc/electra.md b/docs/source/en/model_doc/electra.md index 1f1f5be076..074479019d 100644 --- a/docs/source/en/model_doc/electra.md +++ b/docs/source/en/model_doc/electra.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-03-23 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/emu3.md b/docs/source/en/model_doc/emu3.md index 5f51566084..3f62bffacd 100644 --- a/docs/source/en/model_doc/emu3.md +++ b/docs/source/en/model_doc/emu3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-27 and added to Hugging Face Transformers on 2025-01-10.* # Emu3 diff --git a/docs/source/en/model_doc/encodec.md b/docs/source/en/model_doc/encodec.md index 4f6640bd38..8909917303 100644 --- a/docs/source/en/model_doc/encodec.md +++ b/docs/source/en/model_doc/encodec.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-10-24 and added to Hugging Face Transformers on 2023-06-14.* # EnCodec diff --git a/docs/source/en/model_doc/encoder-decoder.md b/docs/source/en/model_doc/encoder-decoder.md index f01d4c1a67..96a3874c3c 100644 --- a/docs/source/en/model_doc/encoder-decoder.md +++ b/docs/source/en/model_doc/encoder-decoder.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2017-06-12 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/eomt.md b/docs/source/en/model_doc/eomt.md index 86816a475f..754b88e2c3 100644 --- a/docs/source/en/model_doc/eomt.md +++ b/docs/source/en/model_doc/eomt.md @@ -8,6 +8,7 @@ specific language governing permissions and limitations under the License. ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2025-03-24 and added to Hugging Face Transformers on 2025-06-27.* # EoMT @@ -17,7 +18,7 @@ rendered properly in your Markdown viewer. ## Overview -The Encoder-only Mask Transformer (EoMT) model was introduced in the CVPR 2025 Highlight Paper [Your ViT is Secretly an Image Segmentation Model](https://www.tue-mps.org/eomt) by Tommie Kerssies, Niccolò Cavagnero, Alexander Hermans, Narges Norouzi, Giuseppe Averta, Bastian Leibe, Gijs Dubbelman, and Daan de Geus. +[The Encoder-only Mask Transformer]((https://www.tue-mps.org/eomt)) (EoMT) model was introduced in the CVPR 2025 Highlight Paper *[Your ViT is Secretly an Image Segmentation Model](https://huggingface.co/papers/2503.19108)* by Tommie Kerssies, Niccolò Cavagnero, Alexander Hermans, Narges Norouzi, Giuseppe Averta, Bastian Leibe, Gijs Dubbelman, and Daan de Geus. EoMT reveals Vision Transformers can perform image segmentation efficiently without task-specific components. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/ernie.md b/docs/source/en/model_doc/ernie.md index 4d076c95d4..8a97ea226f 100644 --- a/docs/source/en/model_doc/ernie.md +++ b/docs/source/en/model_doc/ernie.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-04-19 and added to Hugging Face Transformers on 2022-09-09.*
@@ -22,8 +23,8 @@ rendered properly in your Markdown viewer. # ERNIE -[ERNIE1.0](https://arxiv.org/abs/1904.09223), [ERNIE2.0](https://ojs.aaai.org/index.php/AAAI/article/view/6428), -[ERNIE3.0](https://arxiv.org/abs/2107.02137), [ERNIE-Gram](https://arxiv.org/abs/2010.12148), [ERNIE-health](https://arxiv.org/abs/2110.07244) are a series of powerful models proposed by baidu, especially in Chinese tasks. +[ERNIE1.0](https://huggingface.co/papers/1904.09223), [ERNIE2.0](https://ojs.aaai.org/index.php/AAAI/article/view/6428), +[ERNIE3.0](https://huggingface.co/papers/2107.02137), [ERNIE-Gram](https://huggingface.co/papers/2010.12148), [ERNIE-health](https://huggingface.co/papers/2110.07244) are a series of powerful models proposed by baidu, especially in Chinese tasks. ERNIE (Enhanced Representation through kNowledge IntEgration) is designed to learn language representation enhanced by knowledge masking strategies, which includes entity-level masking and phrase-level masking. diff --git a/docs/source/en/model_doc/ernie4_5.md b/docs/source/en/model_doc/ernie4_5.md index c9f4c04356..e0a7e30015 100644 --- a/docs/source/en/model_doc/ernie4_5.md +++ b/docs/source/en/model_doc/ernie4_5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-30 and added to Hugging Face Transformers on 2025-07-21.*
diff --git a/docs/source/en/model_doc/ernie4_5_moe.md b/docs/source/en/model_doc/ernie4_5_moe.md index f16cfb1f92..9df56edd15 100644 --- a/docs/source/en/model_doc/ernie4_5_moe.md +++ b/docs/source/en/model_doc/ernie4_5_moe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-30 and added to Hugging Face Transformers on 2025-07-21.*
diff --git a/docs/source/en/model_doc/ernie_m.md b/docs/source/en/model_doc/ernie_m.md index 292fce2ac3..508fe2f596 100644 --- a/docs/source/en/model_doc/ernie_m.md +++ b/docs/source/en/model_doc/ernie_m.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-12-31 and added to Hugging Face Transformers on 2023-06-20.* # ErnieM diff --git a/docs/source/en/model_doc/esm.md b/docs/source/en/model_doc/esm.md index 6061d8eea9..5d0f48f3a5 100644 --- a/docs/source/en/model_doc/esm.md +++ b/docs/source/en/model_doc/esm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-04-19 and added to Hugging Face Transformers on 2022-09-30.* # ESM diff --git a/docs/source/en/model_doc/evolla.md b/docs/source/en/model_doc/evolla.md index 79c3b120cb..a39103a06d 100644 --- a/docs/source/en/model_doc/evolla.md +++ b/docs/source/en/model_doc/evolla.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-01-05 and added to Hugging Face Transformers on 2025-07-26.* # Evolla diff --git a/docs/source/en/model_doc/exaone4.md b/docs/source/en/model_doc/exaone4.md index 45667cea34..166da68575 100644 --- a/docs/source/en/model_doc/exaone4.md +++ b/docs/source/en/model_doc/exaone4.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-15 and added to Hugging Face Transformers on 2025-07-26.* # EXAONE 4 @@ -28,7 +29,7 @@ In the EXAONE 4.0 architecture, we apply new architectural changes compared to p 1. **Hybrid Attention**: For the 32B model, we adopt hybrid attention scheme, which combines *Local attention (sliding window attention)* with *Global attention (full attention)* in a 3:1 ratio. We do not use RoPE (Rotary Positional Embedding) for global attention for better global context understanding. 2. **QK-Reorder-Norm**: We reorder the LayerNorm position from the traditional Pre-LN scheme by applying LayerNorm directly to the attention and MLP outputs, and we add RMS normalization right after the Q and K projection. It helps yield better performance on downstream tasks despite consuming more computation. -For more details, please refer to our [technical report](https://arxiv.org/abs/2507.11407), [HuggingFace paper](https://huggingface.co/papers/2507.11407), [blog](https://www.lgresearch.ai/blog/view?seq=576), and [GitHub](https://github.com/LG-AI-EXAONE/EXAONE-4.0). +For more details, please refer to our [technical report](https://huggingface.co/papers/2507.11407), [HuggingFace paper](https://huggingface.co/papers/2507.11407), [blog](https://www.lgresearch.ai/blog/view?seq=576), and [GitHub](https://github.com/LG-AI-EXAONE/EXAONE-4.0). All model weights including quantized versions are available at [Huggingface Collections](https://huggingface.co/collections/LGAI-EXAONE/exaone-40-686b2e0069800c835ed48375). diff --git a/docs/source/en/model_doc/falcon.md b/docs/source/en/model_doc/falcon.md index b7b87e2ab9..f336adfd6d 100644 --- a/docs/source/en/model_doc/falcon.md +++ b/docs/source/en/model_doc/falcon.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-11-28 and added to Hugging Face Transformers on 2023-07-11.*
diff --git a/docs/source/en/model_doc/falcon3.md b/docs/source/en/model_doc/falcon3.md index 276548be77..93dc356e21 100644 --- a/docs/source/en/model_doc/falcon3.md +++ b/docs/source/en/model_doc/falcon3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-17 and added to Hugging Face Transformers on 2024-12-17.* # Falcon3 @@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. ## Overview -Falcon3 represents a natural evolution from previous releases, emphasizing expanding the models' science, math, and code capabilities. This iteration includes five base models: Falcon3-1B-Base, Falcon3-3B-Base, Falcon3-Mamba-7B-Base, Falcon3-7B-Base, and Falcon3-10B-Base. In developing these models, we incorporated several key innovations aimed at improving the models' performances while reducing training costs: +[Falcon3](https://falconllm.tii.ae/falcon3/index.html) represents a natural evolution from previous releases, emphasizing expanding the models' science, math, and code capabilities. This iteration includes five base models: Falcon3-1B-Base, Falcon3-3B-Base, Falcon3-Mamba-7B-Base, Falcon3-7B-Base, and Falcon3-10B-Base. In developing these models, we incorporated several key innovations aimed at improving the models' performances while reducing training costs: One pre-training: We conducted a single large-scale pretraining run on the 7B model, using 2048 H100 GPU chips, leveraging 14 trillion tokens featuring web, code, STEM, and curated high-quality and multilingual data. Depth up-scaling for improved reasoning: Building on recent studies on the effects of model depth, we upscaled the 7B model to a 10B parameters model by duplicating the redundant layers and continuing pre-training with 2TT of high-quality data. This yielded Falcon3-10B-Base which achieves state-of-the-art zero-shot and few-shot performance for models under 13B parameters. diff --git a/docs/source/en/model_doc/falcon_h1.md b/docs/source/en/model_doc/falcon_h1.md index 96d2ea8dec..981c00bd62 100644 --- a/docs/source/en/model_doc/falcon_h1.md +++ b/docs/source/en/model_doc/falcon_h1.md @@ -8,12 +8,13 @@ specific language governing permissions and limitations under the License. ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2025-05-21 and added to Hugging Face Transformers on 2025-05-21.* # FalconH1 ## Overview -The FalconH1 model was developed by the TII Pretraining team. A comprehensive research paper covering the architecture, pretraining dynamics, experimental results, and conclusions is forthcoming. You can read more about this series in [this website](https://github.com/tiiuae/Falcon-H1). +The [FalconH1](https://huggingface.co/blog/tiiuae/falcon-h1) model was developed by the TII Pretraining team. A comprehensive research paper covering the architecture, pretraining dynamics, experimental results, and conclusions is forthcoming. You can read more about this series in [this website](https://github.com/tiiuae/Falcon-H1). ## Contributors diff --git a/docs/source/en/model_doc/falcon_mamba.md b/docs/source/en/model_doc/falcon_mamba.md index 0b797c7c78..faf5ad79b4 100644 --- a/docs/source/en/model_doc/falcon_mamba.md +++ b/docs/source/en/model_doc/falcon_mamba.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-07 and added to Hugging Face Transformers on 2024-08-12.*
diff --git a/docs/source/en/model_doc/fastspeech2_conformer.md b/docs/source/en/model_doc/fastspeech2_conformer.md index f6abf6125f..a131f75f95 100644 --- a/docs/source/en/model_doc/fastspeech2_conformer.md +++ b/docs/source/en/model_doc/fastspeech2_conformer.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2020-10-26 and added to Hugging Face Transformers on 2024-01-03.* # FastSpeech2Conformer diff --git a/docs/source/en/model_doc/flan-t5.md b/docs/source/en/model_doc/flan-t5.md index 8f6f413894..cc2b768172 100644 --- a/docs/source/en/model_doc/flan-t5.md +++ b/docs/source/en/model_doc/flan-t5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-10-20 and added to Hugging Face Transformers on 2023-06-20.* # FLAN-T5 diff --git a/docs/source/en/model_doc/flan-ul2.md b/docs/source/en/model_doc/flan-ul2.md index 3b946b909b..f8cab7f79b 100644 --- a/docs/source/en/model_doc/flan-ul2.md +++ b/docs/source/en/model_doc/flan-ul2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-03-03 and added to Hugging Face Transformers on 2023-06-20.* # FLAN-UL2 @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. ## Overview -Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the [UL2](ul2) model released earlier last year. +[Flan-UL2](https://www.yitay.net/blog/flan-ul2-20b) is an encoder decoder model based on the T5 architecture. It uses the same configuration as the [UL2](ul2) model released earlier last year. It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to `Flan-T5`, one can directly use FLAN-UL2 weights without finetuning the model: According to the original blog here are the notable improvements: @@ -35,7 +36,7 @@ According to the original blog here are the notable improvements: - The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore. Google has released the following variants: -The original checkpoints can be found [here](https://github.com/google-research/t5x/blob/main/docs/models.md#flan-ul2-checkpoints). +The original checkpoints can be found [here](https://github.com/google-research/google-research/tree/master/ul2). ## Running on low resource devices diff --git a/docs/source/en/model_doc/flaubert.md b/docs/source/en/model_doc/flaubert.md index f921cfdce1..810bde229b 100644 --- a/docs/source/en/model_doc/flaubert.md +++ b/docs/source/en/model_doc/flaubert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-12-11 and added to Hugging Face Transformers on 2020-11-16.* # FlauBERT diff --git a/docs/source/en/model_doc/flava.md b/docs/source/en/model_doc/flava.md index 9360bb7a97..05b63399ea 100644 --- a/docs/source/en/model_doc/flava.md +++ b/docs/source/en/model_doc/flava.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-12-08 and added to Hugging Face Transformers on 2022-05-11.* # FLAVA diff --git a/docs/source/en/model_doc/fnet.md b/docs/source/en/model_doc/fnet.md index 5d1a7d498c..79a4e9e443 100644 --- a/docs/source/en/model_doc/fnet.md +++ b/docs/source/en/model_doc/fnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-05-09 and added to Hugging Face Transformers on 2021-09-20.* # FNet diff --git a/docs/source/en/model_doc/focalnet.md b/docs/source/en/model_doc/focalnet.md index 02cd9e173d..b5ce29086a 100644 --- a/docs/source/en/model_doc/focalnet.md +++ b/docs/source/en/model_doc/focalnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-03-22 and added to Hugging Face Transformers on 2023-04-23.* # FocalNet diff --git a/docs/source/en/model_doc/fsmt.md b/docs/source/en/model_doc/fsmt.md index acce6979ba..27c7d3a899 100644 --- a/docs/source/en/model_doc/fsmt.md +++ b/docs/source/en/model_doc/fsmt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-07-15 and added to Hugging Face Transformers on 2020-11-16.* # FSMT diff --git a/docs/source/en/model_doc/funnel.md b/docs/source/en/model_doc/funnel.md index 8eb35ea1d3..606541d388 100644 --- a/docs/source/en/model_doc/funnel.md +++ b/docs/source/en/model_doc/funnel.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-05 and added to Hugging Face Transformers on 2020-11-16.* # Funnel Transformer diff --git a/docs/source/en/model_doc/fuyu.md b/docs/source/en/model_doc/fuyu.md index 60ae9efdf3..91506f8839 100644 --- a/docs/source/en/model_doc/fuyu.md +++ b/docs/source/en/model_doc/fuyu.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-10-17 and added to Hugging Face Transformers on 2023-10-19.* # Fuyu diff --git a/docs/source/en/model_doc/gemma.md b/docs/source/en/model_doc/gemma.md index 63e4d0409f..041e6ddb6e 100644 --- a/docs/source/en/model_doc/gemma.md +++ b/docs/source/en/model_doc/gemma.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-13 and added to Hugging Face Transformers on 2024-02-21.*
diff --git a/docs/source/en/model_doc/gemma2.md b/docs/source/en/model_doc/gemma2.md index 08ff2359f4..80964a477c 100644 --- a/docs/source/en/model_doc/gemma2.md +++ b/docs/source/en/model_doc/gemma2.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-31 and added to Hugging Face Transformers on 2024-06-27.*
PyTorch diff --git a/docs/source/en/model_doc/gemma3.md b/docs/source/en/model_doc/gemma3.md index 0fd1b7452b..e956d98c13 100644 --- a/docs/source/en/model_doc/gemma3.md +++ b/docs/source/en/model_doc/gemma3.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-03-25 and added to Hugging Face Transformers on 2025-03-12.*
@@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. # Gemma 3 -[Gemma 3](https://goo.gle/Gemma3Report) is a multimodal model with pretrained and instruction-tuned variants, available in 1B, 13B, and 27B parameters. The architecture is mostly the same as the previous Gemma versions. The key differences are alternating 5 local sliding window self-attention layers for every global self-attention layer, support for a longer context length of 128K tokens, and a [SigLip](./siglip) encoder that can "pan & scan" high-resolution images to prevent information from disappearing in high resolution images or images with non-square aspect ratios. +[Gemma 3](https://huggingface.co/papers/2503.19786) is a multimodal model with pretrained and instruction-tuned variants, available in 1B, 13B, and 27B parameters. The architecture is mostly the same as the previous Gemma versions. The key differences are alternating 5 local sliding window self-attention layers for every global self-attention layer, support for a longer context length of 128K tokens, and a [SigLip](./siglip) encoder that can "pan & scan" high-resolution images to prevent information from disappearing in high resolution images or images with non-square aspect ratios. The instruction-tuned variant was post-trained with knowledge distillation and reinforcement learning. diff --git a/docs/source/en/model_doc/gemma3n.md b/docs/source/en/model_doc/gemma3n.md index 803940b6f2..eef1cb3713 100644 --- a/docs/source/en/model_doc/gemma3n.md +++ b/docs/source/en/model_doc/gemma3n.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-05-20 and added to Hugging Face Transformers on 2025-06-26.*
@@ -26,7 +27,7 @@ rendered properly in your Markdown viewer. ## Overview -Gemma3n is a multimodal model with pretrained and instruction-tuned variants, available in E4B and E2B sizes. While +[Gemma3n](https://developers.googleblog.com/en/introducing-gemma-3n/) is a multimodal model with pretrained and instruction-tuned variants, available in E4B and E2B sizes. While large portions of the language model architecture are shared with prior Gemma releases, there are many new additions in this model, including [Alternating Updates][altup] (AltUp), [Learned Augmented Residual Layer][laurel] (LAuReL), [MatFormer][matformer], Per-Layer Embeddings (PLE), [Activation Sparsity with Statistical Top-k][spark-transformer], and KV cache sharing. The language model uses @@ -199,7 +200,7 @@ echo -e "Plants create energy through a process known as" | transformers run --t [altup]: https://proceedings.neurips.cc/paper_files/paper/2023/hash/f2059277ac6ce66e7e5543001afa8bb5-Abstract-Conference.html [attention-mask-viz]: https://github.com/huggingface/transformers/blob/beb9b5b02246b9b7ee81ddf938f93f44cfeaad19/src/transformers/utils/attention_visualizer.py#L139 [gemma3n-collection]: https://huggingface.co/collections/google/gemma-3n -[laurel]: https://arxiv.org/abs/2411.07501 -[matformer]: https://arxiv.org/abs/2310.07707 +[laurel]: https://huggingface.co/papers/2411.07501 +[matformer]: https://huggingface.co/papers/2310.07707 [spark-transformer]: https://arxiv.org/abs/2506.06644 -[usm]: https://arxiv.org/abs/2303.01037 +[usm]: https://huggingface.co/papers/2303.01037 diff --git a/docs/source/en/model_doc/git.md b/docs/source/en/model_doc/git.md index c1b7dba820..a2aa0901b2 100644 --- a/docs/source/en/model_doc/git.md +++ b/docs/source/en/model_doc/git.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-05-27 and added to Hugging Face Transformers on 2023-01-03.* # GIT diff --git a/docs/source/en/model_doc/glm.md b/docs/source/en/model_doc/glm.md index 4a1618459b..3b5e2ad858 100644 --- a/docs/source/en/model_doc/glm.md +++ b/docs/source/en/model_doc/glm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-06-18 and added to Hugging Face Transformers on 2024-10-18.* # GLM diff --git a/docs/source/en/model_doc/glm4.md b/docs/source/en/model_doc/glm4.md index a7df833039..a10926bd5a 100644 --- a/docs/source/en/model_doc/glm4.md +++ b/docs/source/en/model_doc/glm4.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-06-18 and added to Hugging Face Transformers on 2025-04-09.* # Glm4 ## Overview -The GLM family welcomes new members [GLM-4-0414](https://arxiv.org/pdf/2406.12793) series models. +The GLM family welcomes new members [GLM-4-0414](https://huggingface.co/papers/2406.12793) series models. The **GLM-4-32B-0414** series models, featuring 32 billion parameters. Its performance is comparable to OpenAI’s GPT series and DeepSeek’s V3/R1 series. It also supports very user-friendly local deployment features. GLM-4-32B-Base-0414 diff --git a/docs/source/en/model_doc/glm4_moe.md b/docs/source/en/model_doc/glm4_moe.md index 9ec764ec34..3ea84ed339 100644 --- a/docs/source/en/model_doc/glm4_moe.md +++ b/docs/source/en/model_doc/glm4_moe.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-28 and added to Hugging Face Transformers on 2025-07-21.* # Glm4Moe ## Overview -The [**GLM-4.5**](https://arxiv.org/abs/2508.06471) series models are foundation models designed for intelligent agents, MoE variants are documented here as Glm4Moe. +The [**GLM-4.5**](https://huggingface.co/papers/2508.06471) series models are foundation models designed for intelligent agents, MoE variants are documented here as Glm4Moe. GLM-4.5 has **355** billion total parameters with **32** billion active parameters, while GLM-4.5-Air adopts a more compact design with **106** billion total parameters and **12** billion active parameters. GLM-4.5 models unify reasoning, coding, and intelligent agent capabilities to meet the complex demands of intelligent agent applications. @@ -30,7 +31,7 @@ As demonstrated in our comprehensive evaluation across 12 industry-standard benc ![bench](https://raw.githubusercontent.com/zai-org/GLM-4.5/refs/heads/main/resources/bench.png) -For more eval results, show cases, and technical details, please visit our [technical report](https://arxiv.org/abs/2508.06471) or [technical blog](https://z.ai/blog/glm-4.5). +For more eval results, show cases, and technical details, please visit our [technical report](https://huggingface.co/papers/2508.06471) or [technical blog](https://z.ai/blog/glm-4.5). The model code, tool parser and reasoning parser can be found in the implementation of [transformers](https://github.com/huggingface/transformers/tree/main/src/transformers/models/glm4_moe), [vLLM](https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/glm4_moe_mtp.py) and [SGLang](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/models/glm4_moe.py). diff --git a/docs/source/en/model_doc/glm4v.md b/docs/source/en/model_doc/glm4v.md index 0884242150..6a4e783476 100644 --- a/docs/source/en/model_doc/glm4v.md +++ b/docs/source/en/model_doc/glm4v.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-01 and added to Hugging Face Transformers on 2025-06-25.*
diff --git a/docs/source/en/model_doc/glm4v_moe.md b/docs/source/en/model_doc/glm4v_moe.md index e951b02fec..0388cc9eb6 100644 --- a/docs/source/en/model_doc/glm4v_moe.md +++ b/docs/source/en/model_doc/glm4v_moe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-28 and added to Hugging Face Transformers on 2025-08-08.*
@@ -29,7 +30,7 @@ Vision-language models (VLMs) have become a key cornerstone of intelligent syste Through our open-source work, we aim to explore the technological frontier together with the community while empowering more developers to create exciting and innovative applications. -[GLM-4.5V](https://github.com/zai-org/GLM-V) is based on ZhipuAI’s next-generation flagship text foundation model GLM-4.5-Air (106B parameters, 12B active). It continues the technical approach of [GLM-4.1V-Thinking](https://arxiv.org/abs/2507.01006), achieving SOTA performance among models of the same scale on 42 public vision-language benchmarks. It covers common tasks such as image, video, and document understanding, as well as GUI agent operations. +[GLM-4.5V](https://huggingface.co/papers/2508.06471) ([Github repo](https://github.com/zai-org/GLM-V)) is based on ZhipuAI’s next-generation flagship text foundation model GLM-4.5-Air (106B parameters, 12B active). It continues the technical approach of [GLM-4.1V-Thinking](https://huggingface.co/papers/2507.01006), achieving SOTA performance among models of the same scale on 42 public vision-language benchmarks. It covers common tasks such as image, video, and document understanding, as well as GUI agent operations. ![bench_45](https://raw.githubusercontent.com/zai-org/GLM-V/refs/heads/main/resources/bench_45v.jpeg) diff --git a/docs/source/en/model_doc/glpn.md b/docs/source/en/model_doc/glpn.md index 4a4433626f..810e00e00e 100644 --- a/docs/source/en/model_doc/glpn.md +++ b/docs/source/en/model_doc/glpn.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-01-19 and added to Hugging Face Transformers on 2022-03-22.* # GLPN diff --git a/docs/source/en/model_doc/got_ocr2.md b/docs/source/en/model_doc/got_ocr2.md index 6f15f2526f..cd226ab279 100644 --- a/docs/source/en/model_doc/got_ocr2.md +++ b/docs/source/en/model_doc/got_ocr2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-03 and added to Hugging Face Transformers on 2025-01-31.* # GOT-OCR2 diff --git a/docs/source/en/model_doc/gpt-sw3.md b/docs/source/en/model_doc/gpt-sw3.md index 20daa3537a..82de953418 100644 --- a/docs/source/en/model_doc/gpt-sw3.md +++ b/docs/source/en/model_doc/gpt-sw3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-06-25 and added to Hugging Face Transformers on 2022-12-12.* # GPT-Sw3 diff --git a/docs/source/en/model_doc/gpt2.md b/docs/source/en/model_doc/gpt2.md index edc32747bd..451f51836a 100644 --- a/docs/source/en/model_doc/gpt2.md +++ b/docs/source/en/model_doc/gpt2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-02-14 and added to Hugging Face Transformers on 2020-11-16.*
@@ -26,7 +27,7 @@ rendered properly in your Markdown viewer. # GPT-2 -[GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) is a scaled up version of GPT, a causal transformer language model, with 10x more parameters and training data. The model was pretrained on a 40GB dataset to predict the next word in a sequence based on all the previous words. This approach enabled the model to perform many downstream tasks in a zero-shot setting. +[GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) is a scaled up version of GPT, a causal transformer language model, with 10x more parameters and training data. The model was pretrained on a 40GB dataset to predict the next word in a sequence based on all the previous words. This approach enabled the model to perform many downstream tasks in a zero-shot setting. The blog post released by OpenAI can be found [here](https://openai.com/index/better-language-models/). The model architecture uses a unidirectional (causal) attention mechanism where each token can only attend to previous tokens, making it particularly effective for text generation tasks. diff --git a/docs/source/en/model_doc/gpt_bigcode.md b/docs/source/en/model_doc/gpt_bigcode.md index 9e25f3c19e..dadf054027 100644 --- a/docs/source/en/model_doc/gpt_bigcode.md +++ b/docs/source/en/model_doc/gpt_bigcode.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-01-09 and added to Hugging Face Transformers on 2023-04-10.* # GPTBigCode diff --git a/docs/source/en/model_doc/gpt_neo.md b/docs/source/en/model_doc/gpt_neo.md index 3830f04378..98c9cfa1f5 100644 --- a/docs/source/en/model_doc/gpt_neo.md +++ b/docs/source/en/model_doc/gpt_neo.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-21 and added to Hugging Face Transformers on 2021-03-30.*
@@ -26,7 +27,7 @@ rendered properly in your Markdown viewer. ## GPT-Neo -[GPT-Neo](https://zenodo.org/records/5297715) is an open-source alternative to GPT-2 and GPT-3 models, built with Mesh TensorFlow for TPUs. GPT-Neo uses local attention in every other layer for more efficiency. It is trained on the [Pile](https://huggingface.co/datasets/EleutherAI/pile), a diverse dataset consisting of 22 smaller high-quality datasets. +[GPT-Neo](https://zenodo.org/records/5297715) is an open-source alternative to GPT-2 and GPT-3 models, built with Mesh TensorFlow for TPUs. GPT-Neo uses local attention in every other layer for more efficiency. It is trained on the [Pile](https://huggingface.co/datasets/EleutherAI/pile), a diverse dataset consisting of 22 smaller high-quality datasets. The original github repository can be found [here](https://github.com/EleutherAI/gpt-neo/tree/v1.1) You can find all the original GPT-Neo checkpoints under the [EleutherAI](https://huggingface.co/EleutherAI?search_models=gpt-neo) organization. diff --git a/docs/source/en/model_doc/gpt_neox.md b/docs/source/en/model_doc/gpt_neox.md index 35f12bdb21..43a63624e8 100644 --- a/docs/source/en/model_doc/gpt_neox.md +++ b/docs/source/en/model_doc/gpt_neox.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-04-14 and added to Hugging Face Transformers on 2022-05-24.* # GPT-NeoX @@ -23,7 +24,7 @@ rendered properly in your Markdown viewer. ## Overview -We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will +We introduce [GPT-NeoX-20B](https://huggingface.co/papers/2204.06745), a 20 billion parameter autoregressive language model trained on the Pile, whose weights will be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge, the largest dense autoregressive model that has publicly available weights at the time of submission. In this work, we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding, diff --git a/docs/source/en/model_doc/gpt_neox_japanese.md b/docs/source/en/model_doc/gpt_neox_japanese.md index cedfafa133..28c3237062 100644 --- a/docs/source/en/model_doc/gpt_neox_japanese.md +++ b/docs/source/en/model_doc/gpt_neox_japanese.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-07-27 and added to Hugging Face Transformers on 2022-09-14.* # GPT-NeoX-Japanese diff --git a/docs/source/en/model_doc/gpt_oss.md b/docs/source/en/model_doc/gpt_oss.md index 5fa916e64e..7b5e14c51e 100644 --- a/docs/source/en/model_doc/gpt_oss.md +++ b/docs/source/en/model_doc/gpt_oss.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-08-05 and added to Hugging Face Transformers on 2025-08-05.*
@@ -28,7 +29,7 @@ rendered properly in your Markdown viewer. ## Overview -The GptOss model was proposed in []() by . +The GptOss model was proposed in [blog post](https://openai.com/index/introducing-gpt-oss/) by . The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/gptj.md b/docs/source/en/model_doc/gptj.md index 8e852d931a..95a142ae1a 100644 --- a/docs/source/en/model_doc/gptj.md +++ b/docs/source/en/model_doc/gptj.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-04 and added to Hugging Face Transformers on 2021-08-31.* # GPT-J @@ -26,7 +27,7 @@ rendered properly in your Markdown viewer. ## Overview -The GPT-J model was released in the [kingoflolz/mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax) repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like +The [GPT-J](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/) model was released in the [kingoflolz/mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax) repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like causal language model trained on [the Pile](https://pile.eleuther.ai/) dataset. This model was contributed by [Stella Biderman](https://huggingface.co/stellaathena). diff --git a/docs/source/en/model_doc/gptsan-japanese.md b/docs/source/en/model_doc/gptsan-japanese.md index 929e7330ce..50688d12d5 100644 --- a/docs/source/en/model_doc/gptsan-japanese.md +++ b/docs/source/en/model_doc/gptsan-japanese.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-02-07 and added to Hugging Face Transformers on 2023-06-20.* # GPTSAN-japanese @@ -30,7 +31,7 @@ You can do so by running the following command: `pip install -U transformers==4. ## Overview -The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama). +The [GPTSAN-japanese](https://huggingface.co/Tanrei/GPTSAN-japanese) model was released in the repository by Toshiyuki Sakamoto (tanreinama). GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can diff --git a/docs/source/en/model_doc/granite.md b/docs/source/en/model_doc/granite.md index bdc71c2997..aea52d88c3 100644 --- a/docs/source/en/model_doc/granite.md +++ b/docs/source/en/model_doc/granite.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-23 and added to Hugging Face Transformers on 2024-08-27.*
diff --git a/docs/source/en/model_doc/granite_speech.md b/docs/source/en/model_doc/granite_speech.md index be5714a3ab..b9973f53be 100644 --- a/docs/source/en/model_doc/granite_speech.md +++ b/docs/source/en/model_doc/granite_speech.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-16 and added to Hugging Face Transformers on 2025-04-11.* # Granite Speech @@ -21,7 +22,7 @@ rendered properly in your Markdown viewer.
## Overview -The Granite Speech model is a multimodal language model, consisting of a speech encoder, speech projector, large language model, and LoRA adapter(s). More details regarding each component for the current (Granite 3.2 Speech) model architecture may be found below. +The [Granite Speech](https://huggingface.co/papers/2505.08699) model ([blog post](https://www.ibm.com/new/announcements/ibm-granite-3-3-speech-recognition-refined-reasoning-rag-loras)) is a multimodal language model, consisting of a speech encoder, speech projector, large language model, and LoRA adapter(s). More details regarding each component for the current (Granite 3.2 Speech) model architecture may be found below. 1. Speech Encoder: A [Conformer](https://huggingface.co/papers/2005.08100) encoder trained with Connectionist Temporal Classification (CTC) on character-level targets on ASR corpora. The encoder uses block-attention and self-conditioned CTC from the middle layer. diff --git a/docs/source/en/model_doc/granitemoe.md b/docs/source/en/model_doc/granitemoe.md index 3334008f0c..71c266a76b 100644 --- a/docs/source/en/model_doc/granitemoe.md +++ b/docs/source/en/model_doc/granitemoe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-23 and added to Hugging Face Transformers on 2024-09-20.* # GraniteMoe diff --git a/docs/source/en/model_doc/granitemoehybrid.md b/docs/source/en/model_doc/granitemoehybrid.md index 92d6e3b70a..27b6e85d9e 100644 --- a/docs/source/en/model_doc/granitemoehybrid.md +++ b/docs/source/en/model_doc/granitemoehybrid.md @@ -13,13 +13,14 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-05-02 and added to Hugging Face Transformers on 2025-05-05.* # GraniteMoeHybrid ## Overview -The `GraniteMoeHybrid` model builds on top of `GraniteMoeSharedModel` and `Bamba`. Its decoding layers consist of state space layers or MoE attention layers with shared experts. By default, the attention layers do not use positional encoding. +The [GraniteMoeHybrid](https://www.ibm.com/new/announcements/ibm-granite-4-0-tiny-preview-sneak-peek) model builds on top of GraniteMoeSharedModel and Bamba. Its decoding layers consist of state space layers or MoE attention layers with shared experts. By default, the attention layers do not use positional encoding. ```python diff --git a/docs/source/en/model_doc/granitemoeshared.md b/docs/source/en/model_doc/granitemoeshared.md index 54a956c0f3..d09ab5766f 100644 --- a/docs/source/en/model_doc/granitemoeshared.md +++ b/docs/source/en/model_doc/granitemoeshared.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-23 and added to Hugging Face Transformers on 2025-02-14.* # GraniteMoeShared diff --git a/docs/source/en/model_doc/granitevision.md b/docs/source/en/model_doc/granitevision.md index e11c806ae6..4130e999a5 100644 --- a/docs/source/en/model_doc/granitevision.md +++ b/docs/source/en/model_doc/granitevision.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-18 and added to Hugging Face Transformers on 2025-01-23.* # Granite Vision ## Overview -The Granite Vision model is a variant of [LLaVA-NeXT](llava_next), leveraging a [Granite](granite) language model alongside a [SigLIP](SigLIP) visual encoder. It utilizes multiple concatenated vision hidden states as its image features, similar to [VipLlava](vipllava). It also uses a larger set of image grid pinpoints than the original LlaVa-NeXT models to support additional aspect ratios. +The [Granite Vision](https://www.ibm.com/new/announcements/ibm-granite-3-1-powerful-performance-long-context-and-more) model is a variant of [LLaVA-NeXT](llava_next), leveraging a [Granite](granite) language model alongside a [SigLIP](SigLIP) visual encoder. It utilizes multiple concatenated vision hidden states as its image features, similar to [VipLlava](vipllava). It also uses a larger set of image grid pinpoints than the original LlaVa-NeXT models to support additional aspect ratios. Tips: - This model is loaded into Transformers as an instance of LlaVA-Next. The usage and tips from [LLaVA-NeXT](llava_next) apply to this model as well. diff --git a/docs/source/en/model_doc/graphormer.md b/docs/source/en/model_doc/graphormer.md index b602bc9b0d..851f52df09 100644 --- a/docs/source/en/model_doc/graphormer.md +++ b/docs/source/en/model_doc/graphormer.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-09 and added to Hugging Face Transformers on 2023-06-20.* # Graphormer diff --git a/docs/source/en/model_doc/grounding-dino.md b/docs/source/en/model_doc/grounding-dino.md index 145913da63..22594ab120 100644 --- a/docs/source/en/model_doc/grounding-dino.md +++ b/docs/source/en/model_doc/grounding-dino.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-03-09 and added to Hugging Face Transformers on 2024-04-11.* # Grounding DINO diff --git a/docs/source/en/model_doc/groupvit.md b/docs/source/en/model_doc/groupvit.md index dbe83b64c8..ceb29d897f 100644 --- a/docs/source/en/model_doc/groupvit.md +++ b/docs/source/en/model_doc/groupvit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-02-22 and added to Hugging Face Transformers on 2022-06-28.* # GroupViT diff --git a/docs/source/en/model_doc/helium.md b/docs/source/en/model_doc/helium.md index a9296eb110..fd1623ce47 100644 --- a/docs/source/en/model_doc/helium.md +++ b/docs/source/en/model_doc/helium.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-01-13 and added to Hugging Face Transformers on 2025-01-13.* # Helium diff --git a/docs/source/en/model_doc/herbert.md b/docs/source/en/model_doc/herbert.md index aa4f535ed2..2e5f2cbe66 100644 --- a/docs/source/en/model_doc/herbert.md +++ b/docs/source/en/model_doc/herbert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-05-01 and added to Hugging Face Transformers on 2020-11-16.* # HerBERT @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. ## Overview -The HerBERT model was proposed in [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and +The HerBERT model was proposed in [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://huggingface.co/papers/2005.00630) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic masking of whole words. diff --git a/docs/source/en/model_doc/hgnet_v2.md b/docs/source/en/model_doc/hgnet_v2.md index d12a1712db..8c315e9b82 100644 --- a/docs/source/en/model_doc/hgnet_v2.md +++ b/docs/source/en/model_doc/hgnet_v2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-01 and added to Hugging Face Transformers on 2025-04-29.*
diff --git a/docs/source/en/model_doc/hiera.md b/docs/source/en/model_doc/hiera.md index 9d20f34670..9f4627dd53 100644 --- a/docs/source/en/model_doc/hiera.md +++ b/docs/source/en/model_doc/hiera.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-01 and added to Hugging Face Transformers on 2024-07-12.* # Hiera diff --git a/docs/source/en/model_doc/hubert.md b/docs/source/en/model_doc/hubert.md index 7396565087..36323e16ba 100644 --- a/docs/source/en/model_doc/hubert.md +++ b/docs/source/en/model_doc/hubert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-14 and added to Hugging Face Transformers on 2021-06-16.*
diff --git a/docs/source/en/model_doc/ibert.md b/docs/source/en/model_doc/ibert.md index 34893c6c1d..a04d291a6b 100644 --- a/docs/source/en/model_doc/ibert.md +++ b/docs/source/en/model_doc/ibert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-01-05 and added to Hugging Face Transformers on 2021-02-26.* # I-BERT diff --git a/docs/source/en/model_doc/idefics.md b/docs/source/en/model_doc/idefics.md index 2b8e471213..57b2c8328b 100644 --- a/docs/source/en/model_doc/idefics.md +++ b/docs/source/en/model_doc/idefics.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-21 and added to Hugging Face Transformers on 2023-08-18.* # IDEFICS diff --git a/docs/source/en/model_doc/idefics2.md b/docs/source/en/model_doc/idefics2.md index 58e0dd0ecb..f5612ade80 100644 --- a/docs/source/en/model_doc/idefics2.md +++ b/docs/source/en/model_doc/idefics2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-03 and added to Hugging Face Transformers on 2024-04-15.* # Idefics2 diff --git a/docs/source/en/model_doc/idefics3.md b/docs/source/en/model_doc/idefics3.md index 5a5b45bd39..b3e199e2b8 100644 --- a/docs/source/en/model_doc/idefics3.md +++ b/docs/source/en/model_doc/idefics3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-22 and added to Hugging Face Transformers on 2024-09-25.* # Idefics3 diff --git a/docs/source/en/model_doc/ijepa.md b/docs/source/en/model_doc/ijepa.md index 146b673732..a8582f19c7 100644 --- a/docs/source/en/model_doc/ijepa.md +++ b/docs/source/en/model_doc/ijepa.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-01-19 and added to Hugging Face Transformers on 2024-12-05.*
diff --git a/docs/source/en/model_doc/imagegpt.md b/docs/source/en/model_doc/imagegpt.md index 7fbec62d30..e3c5db1524 100644 --- a/docs/source/en/model_doc/imagegpt.md +++ b/docs/source/en/model_doc/imagegpt.md @@ -12,6 +12,7 @@ Unless required by applicable law or agreed to in writing, software distributed rendered properly in your Markdown viewer. specific language governing permissions and limitations under the License. --> +*This model was released on 2020-06-17 and added to Hugging Face Transformers on 2021-11-18.* # ImageGPT @@ -25,7 +26,7 @@ The ImageGPT model was proposed in [Generative Pretraining from Pixels](https:// Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like model trained to predict the next pixel value, allowing for both unconditional and conditional image generation. -The abstract from the paper is the following: +The abstract from the [paper](https://cdn.openai.com/papers/Generative_Pretraining_from_Pixels_V1_ICML.pdf) is the following: *Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels, diff --git a/docs/source/en/model_doc/informer.md b/docs/source/en/model_doc/informer.md index d511d0f498..7e79399cbc 100644 --- a/docs/source/en/model_doc/informer.md +++ b/docs/source/en/model_doc/informer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-12-14 and added to Hugging Face Transformers on 2023-03-08.* # Informer diff --git a/docs/source/en/model_doc/instructblip.md b/docs/source/en/model_doc/instructblip.md index c297ca0ac4..b0669f1c06 100644 --- a/docs/source/en/model_doc/instructblip.md +++ b/docs/source/en/model_doc/instructblip.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-05-11 and added to Hugging Face Transformers on 2023-06-26.* # InstructBLIP diff --git a/docs/source/en/model_doc/instructblipvideo.md b/docs/source/en/model_doc/instructblipvideo.md index d0b4dc3cc0..00cf56518d 100644 --- a/docs/source/en/model_doc/instructblipvideo.md +++ b/docs/source/en/model_doc/instructblipvideo.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-05-11 and added to Hugging Face Transformers on 2024-06-25.* # InstructBlipVideo diff --git a/docs/source/en/model_doc/internvl.md b/docs/source/en/model_doc/internvl.md index 97802cb94e..27e2fb1315 100644 --- a/docs/source/en/model_doc/internvl.md +++ b/docs/source/en/model_doc/internvl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-14 and added to Hugging Face Transformers on 2025-04-18.*
diff --git a/docs/source/en/model_doc/jamba.md b/docs/source/en/model_doc/jamba.md index 5dad796f26..204fbed6d3 100644 --- a/docs/source/en/model_doc/jamba.md +++ b/docs/source/en/model_doc/jamba.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-28 and added to Hugging Face Transformers on 2024-04-18.*
diff --git a/docs/source/en/model_doc/janus.md b/docs/source/en/model_doc/janus.md index f2825cbc97..047adb04b3 100644 --- a/docs/source/en/model_doc/janus.md +++ b/docs/source/en/model_doc/janus.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-17 and added to Hugging Face Transformers on 2025-04-17.* # Janus diff --git a/docs/source/en/model_doc/jetmoe.md b/docs/source/en/model_doc/jetmoe.md index 897270a383..059fb956ce 100644 --- a/docs/source/en/model_doc/jetmoe.md +++ b/docs/source/en/model_doc/jetmoe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-07 and added to Hugging Face Transformers on 2024-05-14.* # JetMoe diff --git a/docs/source/en/model_doc/jukebox.md b/docs/source/en/model_doc/jukebox.md index 75351801b8..385eeb560e 100644 --- a/docs/source/en/model_doc/jukebox.md +++ b/docs/source/en/model_doc/jukebox.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-30 and added to Hugging Face Transformers on 2023-06-20.* # Jukebox
diff --git a/docs/source/en/model_doc/kosmos-2.md b/docs/source/en/model_doc/kosmos-2.md index d9105da5d1..c449dfd71a 100644 --- a/docs/source/en/model_doc/kosmos-2.md +++ b/docs/source/en/model_doc/kosmos-2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-26 and added to Hugging Face Transformers on 2023-10-30.* # KOSMOS-2 diff --git a/docs/source/en/model_doc/kyutai_speech_to_text.md b/docs/source/en/model_doc/kyutai_speech_to_text.md index 1c7d93e2af..4fd4d43c62 100644 --- a/docs/source/en/model_doc/kyutai_speech_to_text.md +++ b/docs/source/en/model_doc/kyutai_speech_to_text.md @@ -13,11 +13,12 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-17 and added to Hugging Face Transformers on 2025-06-25.* # Kyutai Speech-To-Text ## Overview -Kyutai STT is a speech-to-text model architecture based on the [Mimi codec](https://huggingface.co/docs/transformers/en/model_doc/mimi), which encodes audio into discrete tokens in a streaming fashion, and a [Moshi-like](https://huggingface.co/docs/transformers/en/model_doc/moshi) autoregressive decoder. Kyutai’s lab has released two model checkpoints: +[Kyutai STT](https://kyutai.org/next/stt) is a speech-to-text model architecture based on the [Mimi codec](https://huggingface.co/docs/transformers/en/model_doc/mimi), which encodes audio into discrete tokens in a streaming fashion, and a [Moshi-like](https://huggingface.co/docs/transformers/en/model_doc/moshi) autoregressive decoder. Kyutai’s lab has released two model checkpoints: - [kyutai/stt-1b-en_fr](https://huggingface.co/kyutai/stt-1b-en_fr): a 1B-parameter model capable of transcribing both English and French - [kyutai/stt-2.6b-en](https://huggingface.co/kyutai/stt-2.6b-en): a 2.6B-parameter model focused solely on English, optimized for maximum transcription accuracy diff --git a/docs/source/en/model_doc/layoutlm.md b/docs/source/en/model_doc/layoutlm.md index 86c5c7c1fc..00e4558cc9 100644 --- a/docs/source/en/model_doc/layoutlm.md +++ b/docs/source/en/model_doc/layoutlm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-12-31 and added to Hugging Face Transformers on 2020-11-16.* # LayoutLM diff --git a/docs/source/en/model_doc/layoutlmv2.md b/docs/source/en/model_doc/layoutlmv2.md index b6c6242e45..c376c04ad7 100644 --- a/docs/source/en/model_doc/layoutlmv2.md +++ b/docs/source/en/model_doc/layoutlmv2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-12-29 and added to Hugging Face Transformers on 2021-08-30.* # LayoutLMV2 diff --git a/docs/source/en/model_doc/layoutlmv3.md b/docs/source/en/model_doc/layoutlmv3.md index cbf6709727..737e9233c7 100644 --- a/docs/source/en/model_doc/layoutlmv3.md +++ b/docs/source/en/model_doc/layoutlmv3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-04-18 and added to Hugging Face Transformers on 2022-05-24.* # LayoutLMv3 diff --git a/docs/source/en/model_doc/layoutxlm.md b/docs/source/en/model_doc/layoutxlm.md index 32f453fb6f..19051f55b6 100644 --- a/docs/source/en/model_doc/layoutxlm.md +++ b/docs/source/en/model_doc/layoutxlm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-18 and added to Hugging Face Transformers on 2021-11-03.* # LayoutXLM diff --git a/docs/source/en/model_doc/led.md b/docs/source/en/model_doc/led.md index 7ac5e44b43..96d82aa7e3 100644 --- a/docs/source/en/model_doc/led.md +++ b/docs/source/en/model_doc/led.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-10 and added to Hugging Face Transformers on 2021-01-05.*
diff --git a/docs/source/en/model_doc/levit.md b/docs/source/en/model_doc/levit.md index 7596980ecd..61ffcbfa95 100644 --- a/docs/source/en/model_doc/levit.md +++ b/docs/source/en/model_doc/levit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-02 and added to Hugging Face Transformers on 2022-06-01.* # LeViT diff --git a/docs/source/en/model_doc/lfm2.md b/docs/source/en/model_doc/lfm2.md index c94e421d76..bffc7f7d14 100644 --- a/docs/source/en/model_doc/lfm2.md +++ b/docs/source/en/model_doc/lfm2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-10 and added to Hugging Face Transformers on 2025-07-10.*
PyTorch diff --git a/docs/source/en/model_doc/lightglue.md b/docs/source/en/model_doc/lightglue.md index 0200285732..33c0ad6309 100644 --- a/docs/source/en/model_doc/lightglue.md +++ b/docs/source/en/model_doc/lightglue.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-23 and added to Hugging Face Transformers on 2025-06-17.*
@@ -20,7 +21,7 @@ rendered properly in your Markdown viewer. # LightGlue -[LightGlue](https://arxiv.org/abs/2306.13643) is a deep neural network that learns to match local features across images. It revisits multiple design decisions of SuperGlue and derives simple but effective improvements. Cumulatively, these improvements make LightGlue more efficient - in terms of both memory and computation, more accurate, and much easier to train. Similar to [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor), this model consists of matching two sets of local features extracted from two images, with the goal of being faster than SuperGlue. Paired with the [SuperPoint model](https://huggingface.co/magic-leap-community/superpoint), it can be used to match two images and estimate the pose between them. +[LightGlue](https://huggingface.co/papers/2306.13643) is a deep neural network that learns to match local features across images. It revisits multiple design decisions of SuperGlue and derives simple but effective improvements. Cumulatively, these improvements make LightGlue more efficient - in terms of both memory and computation, more accurate, and much easier to train. Similar to [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor), this model consists of matching two sets of local features extracted from two images, with the goal of being faster than SuperGlue. Paired with the [SuperPoint model](https://huggingface.co/magic-leap-community/superpoint), it can be used to match two images and estimate the pose between them. You can find all the original LightGlue checkpoints under the [ETH-CVG](https://huggingface.co/ETH-CVG) organization. diff --git a/docs/source/en/model_doc/lilt.md b/docs/source/en/model_doc/lilt.md index 57e8cac28f..54475e7cb3 100644 --- a/docs/source/en/model_doc/lilt.md +++ b/docs/source/en/model_doc/lilt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-02-28 and added to Hugging Face Transformers on 2022-10-12.* # LiLT diff --git a/docs/source/en/model_doc/llama.md b/docs/source/en/model_doc/llama.md index 183775bcad..3636cd331d 100644 --- a/docs/source/en/model_doc/llama.md +++ b/docs/source/en/model_doc/llama.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-02-27 and added to Hugging Face Transformers on 2023-03-16.*
diff --git a/docs/source/en/model_doc/llama2.md b/docs/source/en/model_doc/llama2.md index a2e697e89d..fa2567e88e 100644 --- a/docs/source/en/model_doc/llama2.md +++ b/docs/source/en/model_doc/llama2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-07-18 and added to Hugging Face Transformers on 2023-07-18.*
diff --git a/docs/source/en/model_doc/llama3.md b/docs/source/en/model_doc/llama3.md index ab5c4862c4..f5587a0fb0 100644 --- a/docs/source/en/model_doc/llama3.md +++ b/docs/source/en/model_doc/llama3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-04-18 and added to Hugging Face Transformers on 2024-04-24.* # Llama3 @@ -35,7 +36,7 @@ pipeline("Hey how are you doing today?") ## Overview -The Llama3 model was proposed in [Introducing Meta Llama 3: The most capable openly available LLM to date](https://ai.meta.com/blog/meta-llama-3/) by the meta AI team. +The [Llama3](https://huggingface.co/papers/2407.21783) model was proposed in [Introducing Meta Llama 3: The most capable openly available LLM to date](https://ai.meta.com/blog/meta-llama-3/) by the meta AI team. The abstract from the blogpost is the following: diff --git a/docs/source/en/model_doc/llama4.md b/docs/source/en/model_doc/llama4.md index 07f0919fba..2c21b38078 100644 --- a/docs/source/en/model_doc/llama4.md +++ b/docs/source/en/model_doc/llama4.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-05 and added to Hugging Face Transformers on 2025-04-05.* # Llama4 @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer.
-Llama 4, developed by Meta, introduces a new auto-regressive Mixture-of-Experts (MoE) architecture. +[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/), developed by Meta, introduces a new auto-regressive Mixture-of-Experts (MoE) architecture. This generation includes two models: - The highly capable Llama 4 Maverick with 17B active parameters out of ~400B total, with 128 experts. - The efficient Llama 4 Scout also has 17B active parameters out of ~109B total, using just 16 experts. diff --git a/docs/source/en/model_doc/llava.md b/docs/source/en/model_doc/llava.md index ae1d3c92b1..4748a88154 100644 --- a/docs/source/en/model_doc/llava.md +++ b/docs/source/en/model_doc/llava.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-17 and added to Hugging Face Transformers on 2023-12-07.* # LLaVa diff --git a/docs/source/en/model_doc/llava_next.md b/docs/source/en/model_doc/llava_next.md index 9d3f66a209..c181ce0d6c 100644 --- a/docs/source/en/model_doc/llava_next.md +++ b/docs/source/en/model_doc/llava_next.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-01-30 and added to Hugging Face Transformers on 2024-03-20.*
@@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. # LLaVA-NeXT -[LLaVA‑NeXT](https://llava-vl.github.io/blog/2024-05-10-llava-next-stronger-llms/) improves on [Llava](./llava) by increasing the input image resolution by 4x more pixels and supporting 3 aspect ratios (up to 672x672, 336x1344, 1344x336) to better grasp visual details. It is also trained on an improved visual instruction tuning dataset covering more scenarios and applications to improve OCR and common sense reasoning. +[LLaVA‑NeXT](https://llava-vl.github.io/blog/2024-01-30-llava-next/) improves on [Llava](./llava) by increasing the input image resolution by 4x more pixels and supporting 3 aspect ratios (up to 672x672, 336x1344, 1344x336) to better grasp visual details. It is also trained on an improved visual instruction tuning dataset covering more scenarios and applications to improve OCR and common sense reasoning. You can find all the original LLaVA‑NeXT checkpoints under the [LLaVA-NeXT](https://huggingface.co/collections/llava-hf/llava-next-65f75c4afac77fd37dbbe6cf) collection. diff --git a/docs/source/en/model_doc/llava_next_video.md b/docs/source/en/model_doc/llava_next_video.md index b3e42698c6..15438ae496 100644 --- a/docs/source/en/model_doc/llava_next_video.md +++ b/docs/source/en/model_doc/llava_next_video.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-31 and added to Hugging Face Transformers on 2024-06-26.* # LLaVa-NeXT-Video diff --git a/docs/source/en/model_doc/llava_onevision.md b/docs/source/en/model_doc/llava_onevision.md index 4d15e2a621..ec65f9a550 100644 --- a/docs/source/en/model_doc/llava_onevision.md +++ b/docs/source/en/model_doc/llava_onevision.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-06 and added to Hugging Face Transformers on 2024-09-05.* # LLaVA-OneVision diff --git a/docs/source/en/model_doc/longformer.md b/docs/source/en/model_doc/longformer.md index 093db7ec93..7e0409a6a9 100644 --- a/docs/source/en/model_doc/longformer.md +++ b/docs/source/en/model_doc/longformer.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. ⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-10 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/longt5.md b/docs/source/en/model_doc/longt5.md index b73f408c46..ab2c82e1f8 100644 --- a/docs/source/en/model_doc/longt5.md +++ b/docs/source/en/model_doc/longt5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-12-15 and added to Hugging Face Transformers on 2022-06-13.* # LongT5 diff --git a/docs/source/en/model_doc/luke.md b/docs/source/en/model_doc/luke.md index 6880d2f98a..5eaadf79e9 100644 --- a/docs/source/en/model_doc/luke.md +++ b/docs/source/en/model_doc/luke.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-02 and added to Hugging Face Transformers on 2021-05-03.* # LUKE diff --git a/docs/source/en/model_doc/lxmert.md b/docs/source/en/model_doc/lxmert.md index 77edd6bf78..93f0d212be 100644 --- a/docs/source/en/model_doc/lxmert.md +++ b/docs/source/en/model_doc/lxmert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-08-20 and added to Hugging Face Transformers on 2020-11-16.* # LXMERT diff --git a/docs/source/en/model_doc/m2m_100.md b/docs/source/en/model_doc/m2m_100.md index 6e7b216d7c..e865099a5c 100644 --- a/docs/source/en/model_doc/m2m_100.md +++ b/docs/source/en/model_doc/m2m_100.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-21 and added to Hugging Face Transformers on 2021-03-06.* # M2M100 diff --git a/docs/source/en/model_doc/madlad-400.md b/docs/source/en/model_doc/madlad-400.md index db6abc38ea..d43e0fb6cc 100644 --- a/docs/source/en/model_doc/madlad-400.md +++ b/docs/source/en/model_doc/madlad-400.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-09-09 and added to Hugging Face Transformers on 2023-11-28.* # MADLAD-400 @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. ## Overview -MADLAD-400 models were released in the paper [MADLAD-400: A Multilingual And Document-Level Large Audited Dataset](MADLAD-400: A Multilingual And Document-Level Large Audited Dataset). +MADLAD-400 models were released in the paper [MADLAD-400: A Multilingual And Document-Level Large Audited Dataset](https://huggingface.co/papers/2309.04662). The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/mamba.md b/docs/source/en/model_doc/mamba.md index 06efa75971..30074eeb72 100644 --- a/docs/source/en/model_doc/mamba.md +++ b/docs/source/en/model_doc/mamba.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-12-01 and added to Hugging Face Transformers on 2024-03-05.*
diff --git a/docs/source/en/model_doc/mamba2.md b/docs/source/en/model_doc/mamba2.md index 4d7de552d4..1ba32fba59 100644 --- a/docs/source/en/model_doc/mamba2.md +++ b/docs/source/en/model_doc/mamba2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-31 and added to Hugging Face Transformers on 2024-08-06.*
diff --git a/docs/source/en/model_doc/marian.md b/docs/source/en/model_doc/marian.md index 0bec4a49de..d2a8b7c5b0 100644 --- a/docs/source/en/model_doc/marian.md +++ b/docs/source/en/model_doc/marian.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2018-04-01 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/markuplm.md b/docs/source/en/model_doc/markuplm.md index 07a7342781..897b97853b 100644 --- a/docs/source/en/model_doc/markuplm.md +++ b/docs/source/en/model_doc/markuplm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-16 and added to Hugging Face Transformers on 2022-09-30.* # MarkupLM diff --git a/docs/source/en/model_doc/mask2former.md b/docs/source/en/model_doc/mask2former.md index 04968e27e3..fc4b87f836 100644 --- a/docs/source/en/model_doc/mask2former.md +++ b/docs/source/en/model_doc/mask2former.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-12-02 and added to Hugging Face Transformers on 2023-01-16.* # Mask2Former diff --git a/docs/source/en/model_doc/maskformer.md b/docs/source/en/model_doc/maskformer.md index cd84cd9ffd..17ef4c876e 100644 --- a/docs/source/en/model_doc/maskformer.md +++ b/docs/source/en/model_doc/maskformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-07-13 and added to Hugging Face Transformers on 2022-03-02.* # MaskFormer diff --git a/docs/source/en/model_doc/matcha.md b/docs/source/en/model_doc/matcha.md index 7dc5660db6..e6a73c58fd 100644 --- a/docs/source/en/model_doc/matcha.md +++ b/docs/source/en/model_doc/matcha.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-12-19 and added to Hugging Face Transformers on 2023-06-20.* # MatCha diff --git a/docs/source/en/model_doc/mbart.md b/docs/source/en/model_doc/mbart.md index 3bd3ca0bc6..c39ce009af 100644 --- a/docs/source/en/model_doc/mbart.md +++ b/docs/source/en/model_doc/mbart.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-01-22 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/mctct.md b/docs/source/en/model_doc/mctct.md index beb381f6a0..c766b1a825 100644 --- a/docs/source/en/model_doc/mctct.md +++ b/docs/source/en/model_doc/mctct.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-30 and added to Hugging Face Transformers on 2023-06-20.* # M-CTC-T diff --git a/docs/source/en/model_doc/mega.md b/docs/source/en/model_doc/mega.md index 080d8de529..614df24355 100644 --- a/docs/source/en/model_doc/mega.md +++ b/docs/source/en/model_doc/mega.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-21 and added to Hugging Face Transformers on 2023-06-20.* # MEGA diff --git a/docs/source/en/model_doc/megatron-bert.md b/docs/source/en/model_doc/megatron-bert.md index 8d3ba12295..f8845556f8 100644 --- a/docs/source/en/model_doc/megatron-bert.md +++ b/docs/source/en/model_doc/megatron-bert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-09-17 and added to Hugging Face Transformers on 2021-04-08.* # MegatronBERT diff --git a/docs/source/en/model_doc/megatron_gpt2.md b/docs/source/en/model_doc/megatron_gpt2.md index fc90474663..5ba4c71293 100644 --- a/docs/source/en/model_doc/megatron_gpt2.md +++ b/docs/source/en/model_doc/megatron_gpt2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-09-17 and added to Hugging Face Transformers on 2021-10-01.* # MegatronGPT2 diff --git a/docs/source/en/model_doc/mgp-str.md b/docs/source/en/model_doc/mgp-str.md index 5379a60d52..d6706d7fa6 100644 --- a/docs/source/en/model_doc/mgp-str.md +++ b/docs/source/en/model_doc/mgp-str.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-08 and added to Hugging Face Transformers on 2023-03-13.* # MGP-STR diff --git a/docs/source/en/model_doc/mimi.md b/docs/source/en/model_doc/mimi.md index 4f3f478212..2d655aa596 100644 --- a/docs/source/en/model_doc/mimi.md +++ b/docs/source/en/model_doc/mimi.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-17 and added to Hugging Face Transformers on 2024-09-18.*
diff --git a/docs/source/en/model_doc/minimax.md b/docs/source/en/model_doc/minimax.md index 258f3ff303..0577fc4ee9 100644 --- a/docs/source/en/model_doc/minimax.md +++ b/docs/source/en/model_doc/minimax.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-01-14 and added to Hugging Face Transformers on 2025-06-04.* # MiniMax diff --git a/docs/source/en/model_doc/mistral.md b/docs/source/en/model_doc/mistral.md index ba60eda429..dcda1f3983 100644 --- a/docs/source/en/model_doc/mistral.md +++ b/docs/source/en/model_doc/mistral.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-10-10 and added to Hugging Face Transformers on 2023-09-27.*
diff --git a/docs/source/en/model_doc/mistral3.md b/docs/source/en/model_doc/mistral3.md index 74805551db..5e36c37f68 100644 --- a/docs/source/en/model_doc/mistral3.md +++ b/docs/source/en/model_doc/mistral3.md @@ -13,6 +13,8 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-01-30 and added to Hugging Face Transformers on 2025-03-18.* +
PyTorch diff --git a/docs/source/en/model_doc/mixtral.md b/docs/source/en/model_doc/mixtral.md index 73172f3168..f71b943dd4 100644 --- a/docs/source/en/model_doc/mixtral.md +++ b/docs/source/en/model_doc/mixtral.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-12-11 and added to Hugging Face Transformers on 2023-12-11.* # Mixtral @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. ## Overview -Mixtral-8x7B was introduced in the [Mixtral of Experts blogpost](https://mistral.ai/news/mixtral-of-experts/) by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed. +[Mixtral-8x7B](https://huggingface.co/papers/2401.04088) was introduced in the [Mixtral of Experts blogpost](https://mistral.ai/news/mixtral-of-experts/) by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed. The introduction of the blog post says: diff --git a/docs/source/en/model_doc/mlcd.md b/docs/source/en/model_doc/mlcd.md index 66d87d3e3f..1ce785ee76 100644 --- a/docs/source/en/model_doc/mlcd.md +++ b/docs/source/en/model_doc/mlcd.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-24 and added to Hugging Face Transformers on 2025-04-15.* # MLCD @@ -23,7 +24,7 @@ rendered properly in your Markdown viewer. ## Overview -The MLCD models were released by the DeepGlint-AI team in [unicom](https://github.com/deepglint/unicom), which focuses on building foundational visual models for large multimodal language models using large-scale datasets such as LAION400M and COYO700M, and employs sample-to-cluster contrastive learning to optimize performance. MLCD models are primarily used for multimodal visual large language models, such as LLaVA. +The [MLCD](https://huggingface.co/papers/2407.17331) models were released by the DeepGlint-AI team in [unicom](https://github.com/deepglint/unicom), which focuses on building foundational visual models for large multimodal language models using large-scale datasets such as LAION400M and COYO700M, and employs sample-to-cluster contrastive learning to optimize performance. MLCD models are primarily used for multimodal visual large language models, such as LLaVA. 🔥**MLCD-ViT-bigG**🔥 series is the state-of-the-art vision transformer model enhanced with 2D Rotary Position Embedding (RoPE2D), achieving superior performance on document understanding and visual question answering tasks. Developed by DeepGlint AI, this model demonstrates exceptional capabilities in processing complex visual-language interactions. diff --git a/docs/source/en/model_doc/mllama.md b/docs/source/en/model_doc/mllama.md index cdd4da240a..e4d17da84a 100644 --- a/docs/source/en/model_doc/mllama.md +++ b/docs/source/en/model_doc/mllama.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-25 and added to Hugging Face Transformers on 2024-09-25.* # Mllama @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The Llama 3.2-Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes (text \+ images in / text out). The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image. +The [Llama 3.2-Vision](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/) collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes (text \+ images in / text out). The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image. **Model Architecture:** Llama 3.2-Vision is built on top of Llama 3.1 text-only model, which is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. To support image recognition tasks, the Llama 3.2-Vision model uses a separately trained vision adapter that integrates with the pre-trained Llama 3.1 language model. The adapter consists of a series of cross-attention layers that feed image encoder representations into the core LLM. diff --git a/docs/source/en/model_doc/mluke.md b/docs/source/en/model_doc/mluke.md index 3472ebc220..f9310d6c22 100644 --- a/docs/source/en/model_doc/mluke.md +++ b/docs/source/en/model_doc/mluke.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-15 and added to Hugging Face Transformers on 2021-12-07.* # mLUKE diff --git a/docs/source/en/model_doc/mm-grounding-dino.md b/docs/source/en/model_doc/mm-grounding-dino.md index d129093498..771ee7288a 100644 --- a/docs/source/en/model_doc/mm-grounding-dino.md +++ b/docs/source/en/model_doc/mm-grounding-dino.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-01-04 and added to Hugging Face Transformers on 2025-08-01.*
@@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. # MM Grounding DINO -[MM Grounding DINO](https://arxiv.org/abs/2401.02361) model was proposed in [An Open and Comprehensive Pipeline for Unified Object Grounding and Detection](https://arxiv.org/abs/2401.02361) by Xiangyu Zhao, Yicheng Chen, Shilin Xu, Xiangtai Li, Xinjiang Wang, Yining Li, Haian Huang>. +[MM Grounding DINO](https://huggingface.co/papers/2401.02361) model was proposed in [An Open and Comprehensive Pipeline for Unified Object Grounding and Detection](https://huggingface.co/papers/2401.02361) by Xiangyu Zhao, Yicheng Chen, Shilin Xu, Xiangtai Li, Xinjiang Wang, Yining Li, Haian Huang>. MM Grounding DINO improves upon the [Grounding DINO](https://huggingface.co/docs/transformers/model_doc/grounding-dino) by improving the contrastive class head and removing the parameter sharing in the decoder, improving zero-shot detection performance on both COCO (50.6(+2.2) AP) and LVIS (31.9(+11.8) val AP and 41.4(+12.6) minival AP). diff --git a/docs/source/en/model_doc/mms.md b/docs/source/en/model_doc/mms.md index 53b73f8295..2651e5af03 100644 --- a/docs/source/en/model_doc/mms.md +++ b/docs/source/en/model_doc/mms.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-05-22 and added to Hugging Face Transformers on 2023-06-20.* # MMS diff --git a/docs/source/en/model_doc/mobilebert.md b/docs/source/en/model_doc/mobilebert.md index fcb5c7a133..e6c3783150 100644 --- a/docs/source/en/model_doc/mobilebert.md +++ b/docs/source/en/model_doc/mobilebert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-06 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/mobilenet_v1.md b/docs/source/en/model_doc/mobilenet_v1.md index cd42629e40..006835fe58 100644 --- a/docs/source/en/model_doc/mobilenet_v1.md +++ b/docs/source/en/model_doc/mobilenet_v1.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2017-04-17 and added to Hugging Face Transformers on 2022-11-21.*
diff --git a/docs/source/en/model_doc/mobilenet_v2.md b/docs/source/en/model_doc/mobilenet_v2.md index a97a721d88..51d8d2267b 100644 --- a/docs/source/en/model_doc/mobilenet_v2.md +++ b/docs/source/en/model_doc/mobilenet_v2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2018-01-13 and added to Hugging Face Transformers on 2022-11-14.*
diff --git a/docs/source/en/model_doc/mobilevit.md b/docs/source/en/model_doc/mobilevit.md index 698fab3b82..08d0c54baa 100644 --- a/docs/source/en/model_doc/mobilevit.md +++ b/docs/source/en/model_doc/mobilevit.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-05 and added to Hugging Face Transformers on 2022-06-29.* diff --git a/docs/source/en/model_doc/mobilevitv2.md b/docs/source/en/model_doc/mobilevitv2.md index 9c20fb6e96..7f6d952ad8 100644 --- a/docs/source/en/model_doc/mobilevitv2.md +++ b/docs/source/en/model_doc/mobilevitv2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-06-06 and added to Hugging Face Transformers on 2023-06-02.* # MobileViTV2 diff --git a/docs/source/en/model_doc/modernbert-decoder.md b/docs/source/en/model_doc/modernbert-decoder.md index e5a681a465..e767f30f48 100644 --- a/docs/source/en/model_doc/modernbert-decoder.md +++ b/docs/source/en/model_doc/modernbert-decoder.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-18 and added to Hugging Face Transformers on 2025-07-15.*
diff --git a/docs/source/en/model_doc/modernbert.md b/docs/source/en/model_doc/modernbert.md index 8c939adce0..dd4761a14a 100644 --- a/docs/source/en/model_doc/modernbert.md +++ b/docs/source/en/model_doc/modernbert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-18 and added to Hugging Face Transformers on 2024-12-19.*
diff --git a/docs/source/en/model_doc/moonshine.md b/docs/source/en/model_doc/moonshine.md index 4cd2eec774..4aaacb6c80 100644 --- a/docs/source/en/model_doc/moonshine.md +++ b/docs/source/en/model_doc/moonshine.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-10-21 and added to Hugging Face Transformers on 2025-01-10.*
diff --git a/docs/source/en/model_doc/moshi.md b/docs/source/en/model_doc/moshi.md index 9302a94619..711bac0a07 100644 --- a/docs/source/en/model_doc/moshi.md +++ b/docs/source/en/model_doc/moshi.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-17 and added to Hugging Face Transformers on 2024-10-16.* # Moshi @@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. ## Overview -The Moshi model was proposed in [Moshi: a speech-text foundation model for real-time dialogue](https://kyutai.org/Moshi.pdf) by Alexandre Défossez, Laurent Mazaré, Manu Orsini, Amélie Royer, Patrick Pérez, Hervé Jégou, Edouard Grave and Neil Zeghidour. +The Moshi model was proposed in [Moshi: a speech-text foundation model for real-time dialogue](https://huggingface.co/papers/2410.00037) by Alexandre Défossez, Laurent Mazaré, Manu Orsini, Amélie Royer, Patrick Pérez, Hervé Jégou, Edouard Grave and Neil Zeghidour. Moshi is a speech-text foundation model that casts spoken dialogue as speech-to-speech generation. Starting from a text language model backbone, Moshi generates speech as tokens from the residual quantizer of a neural audio codec, while modeling separately its own speech and that of the user into parallel streams. This allows for the removal of explicit speaker turns, and the modeling of arbitrary conversational dynamics. Moshi also predicts time-aligned text tokens as a prefix to audio tokens. This “Inner Monologue” method significantly improves the linguistic quality of generated speech and provides streaming speech recognition and text-to-speech. As a result, Moshi is the first real-time full-duplex spoken large language model, with a theoretical latency of 160ms, 200ms in practice. diff --git a/docs/source/en/model_doc/mpnet.md b/docs/source/en/model_doc/mpnet.md index caddc635cb..4b18711655 100644 --- a/docs/source/en/model_doc/mpnet.md +++ b/docs/source/en/model_doc/mpnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-20 and added to Hugging Face Transformers on 2020-12-09.* # MPNet diff --git a/docs/source/en/model_doc/mpt.md b/docs/source/en/model_doc/mpt.md index a4dbc5ea6a..9482e6a919 100644 --- a/docs/source/en/model_doc/mpt.md +++ b/docs/source/en/model_doc/mpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-05-05 and added to Hugging Face Transformers on 2023-07-25.* # MPT diff --git a/docs/source/en/model_doc/mra.md b/docs/source/en/model_doc/mra.md index 9faa9a2616..ed11d1d9e0 100644 --- a/docs/source/en/model_doc/mra.md +++ b/docs/source/en/model_doc/mra.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-07-21 and added to Hugging Face Transformers on 2023-07-10.* # MRA diff --git a/docs/source/en/model_doc/mt5.md b/docs/source/en/model_doc/mt5.md index 2796c96eb8..19e2c07476 100644 --- a/docs/source/en/model_doc/mt5.md +++ b/docs/source/en/model_doc/mt5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-22 and added to Hugging Face Transformers on 2020-11-17.*
diff --git a/docs/source/en/model_doc/musicgen.md b/docs/source/en/model_doc/musicgen.md index ff7645bcea..7e91b2265f 100644 --- a/docs/source/en/model_doc/musicgen.md +++ b/docs/source/en/model_doc/musicgen.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-08 and added to Hugging Face Transformers on 2023-06-29.* # MusicGen diff --git a/docs/source/en/model_doc/musicgen_melody.md b/docs/source/en/model_doc/musicgen_melody.md index 3e4bbabc6c..d2cd51bbcf 100644 --- a/docs/source/en/model_doc/musicgen_melody.md +++ b/docs/source/en/model_doc/musicgen_melody.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-08 and added to Hugging Face Transformers on 2024-03-18.* # MusicGen Melody diff --git a/docs/source/en/model_doc/mvp.md b/docs/source/en/model_doc/mvp.md index d2dcdeb301..2cce9bd6ca 100644 --- a/docs/source/en/model_doc/mvp.md +++ b/docs/source/en/model_doc/mvp.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-06-24 and added to Hugging Face Transformers on 2022-06-29.* # MVP diff --git a/docs/source/en/model_doc/myt5.md b/docs/source/en/model_doc/myt5.md index cb406e9d7d..4097357512 100644 --- a/docs/source/en/model_doc/myt5.md +++ b/docs/source/en/model_doc/myt5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-15 and added to Hugging Face Transformers on 2024-10-06.* # myt5 diff --git a/docs/source/en/model_doc/nat.md b/docs/source/en/model_doc/nat.md index 86a935f9f6..dadcae6f17 100644 --- a/docs/source/en/model_doc/nat.md +++ b/docs/source/en/model_doc/nat.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-04-14 and added to Hugging Face Transformers on 2023-06-20.* # Neighborhood Attention Transformer diff --git a/docs/source/en/model_doc/nemotron.md b/docs/source/en/model_doc/nemotron.md index 761ad33fde..84a5b71488 100644 --- a/docs/source/en/model_doc/nemotron.md +++ b/docs/source/en/model_doc/nemotron.md @@ -11,6 +11,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> +*This model was released on 2024-02-26 and added to Hugging Face Transformers on 2024-08-06.* # Nemotron @@ -114,7 +115,7 @@ If you find our work helpful, please consider citing our paper: author={Saurav Muralidharan and Sharath Turuvekere Sreenivas and Raviraj Joshi and Marcin Chochowski and Mostofa Patwary and Mohammad Shoeybi and Bryan Catanzaro and Jan Kautz and Pavlo Molchanov}, journal={arXiv preprint arXiv:2407.14679}, year={2024}, - url={https://arxiv.org/abs/2407.14679}, + url={https://huggingface.co/papers/2407.14679}, } ``` diff --git a/docs/source/en/model_doc/nezha.md b/docs/source/en/model_doc/nezha.md index edbadcb220..37687fc25d 100644 --- a/docs/source/en/model_doc/nezha.md +++ b/docs/source/en/model_doc/nezha.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-08-31 and added to Hugging Face Transformers on 2023-06-20.* # Nezha diff --git a/docs/source/en/model_doc/nllb-moe.md b/docs/source/en/model_doc/nllb-moe.md index 4e5af4fb18..f1456ee402 100644 --- a/docs/source/en/model_doc/nllb-moe.md +++ b/docs/source/en/model_doc/nllb-moe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-07-11 and added to Hugging Face Transformers on 2023-03-27.* # NLLB-MOE diff --git a/docs/source/en/model_doc/nllb.md b/docs/source/en/model_doc/nllb.md index 483d590016..9289660296 100644 --- a/docs/source/en/model_doc/nllb.md +++ b/docs/source/en/model_doc/nllb.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-07-11 and added to Hugging Face Transformers on 2022-07-18.* # NLLB diff --git a/docs/source/en/model_doc/nougat.md b/docs/source/en/model_doc/nougat.md index accde09ffd..65ad1f2458 100644 --- a/docs/source/en/model_doc/nougat.md +++ b/docs/source/en/model_doc/nougat.md @@ -12,6 +12,7 @@ Unless required by applicable law or agreed to in writing, software distributed rendered properly in your Markdown viewer. specific language governing permissions and limitations under the License. --> +*This model was released on 2023-08-25 and added to Hugging Face Transformers on 2023-09-26.* # Nougat diff --git a/docs/source/en/model_doc/nystromformer.md b/docs/source/en/model_doc/nystromformer.md index f368a77a3c..c1c1407fec 100644 --- a/docs/source/en/model_doc/nystromformer.md +++ b/docs/source/en/model_doc/nystromformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-02-07 and added to Hugging Face Transformers on 2022-01-11.* # Nyströmformer diff --git a/docs/source/en/model_doc/olmo.md b/docs/source/en/model_doc/olmo.md index efa56ce0af..0cfd1fb51d 100644 --- a/docs/source/en/model_doc/olmo.md +++ b/docs/source/en/model_doc/olmo.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-02-01 and added to Hugging Face Transformers on 2024-04-17.* # OLMo diff --git a/docs/source/en/model_doc/olmo2.md b/docs/source/en/model_doc/olmo2.md index 1ed21b660f..63dbe91e3f 100644 --- a/docs/source/en/model_doc/olmo2.md +++ b/docs/source/en/model_doc/olmo2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-31 and added to Hugging Face Transformers on 2024-11-25.*
diff --git a/docs/source/en/model_doc/olmoe.md b/docs/source/en/model_doc/olmoe.md index 1db6853cec..2db5bd8b1f 100644 --- a/docs/source/en/model_doc/olmoe.md +++ b/docs/source/en/model_doc/olmoe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-03 and added to Hugging Face Transformers on 2024-09-03.*
@@ -29,7 +30,7 @@ rendered properly in your Markdown viewer. You can find all the original OLMoE checkpoints under the [OLMoE](https://huggingface.co/collections/allenai/olmoe-november-2024-66cf678c047657a30c8cd3da) collection. > [!TIP] -> This model was contributed by [Muennighoff](https://hf.co/Muennighoff). +> This model was contributed by [Muennighoff](https://huggingface.co/Muennighoff). > > Click on the OLMoE models in the right sidebar for more examples of how to apply OLMoE to different language tasks. diff --git a/docs/source/en/model_doc/omdet-turbo.md b/docs/source/en/model_doc/omdet-turbo.md index b4fc6adef3..408e1b02f6 100644 --- a/docs/source/en/model_doc/omdet-turbo.md +++ b/docs/source/en/model_doc/omdet-turbo.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-03-11 and added to Hugging Face Transformers on 2024-09-25.* # OmDet-Turbo diff --git a/docs/source/en/model_doc/oneformer.md b/docs/source/en/model_doc/oneformer.md index 7beb97deb3..c4b3bd142f 100644 --- a/docs/source/en/model_doc/oneformer.md +++ b/docs/source/en/model_doc/oneformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-10 and added to Hugging Face Transformers on 2023-01-19.* # OneFormer diff --git a/docs/source/en/model_doc/open-llama.md b/docs/source/en/model_doc/open-llama.md index 3b4856cd4f..38954cd315 100644 --- a/docs/source/en/model_doc/open-llama.md +++ b/docs/source/en/model_doc/open-llama.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-16 and added to Hugging Face Transformers on 2023-06-20.* # Open-Llama diff --git a/docs/source/en/model_doc/openai-gpt.md b/docs/source/en/model_doc/openai-gpt.md index 27551060c8..29957b5319 100644 --- a/docs/source/en/model_doc/openai-gpt.md +++ b/docs/source/en/model_doc/openai-gpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2018-06-11 and added to Hugging Face Transformers on 2023-06-20.*
@@ -29,7 +30,7 @@ rendered properly in your Markdown viewer. # GPT -[GPT (Generative Pre-trained Transformer)](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf) focuses on effectively learning text representations and transferring them to tasks. This model trains the Transformer decoder to predict the next word, and then fine-tuned on labeled data. +[GPT (Generative Pre-trained Transformer)](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf) ([blog post](https://openai.com/index/language-unsupervised/)) focuses on effectively learning text representations and transferring them to tasks. This model trains the Transformer decoder to predict the next word, and then fine-tuned on labeled data. GPT can generate high-quality text, making it well-suited for a variety of natural language understanding tasks such as textual entailment, question answering, semantic similarity, and document classification. diff --git a/docs/source/en/model_doc/opt.md b/docs/source/en/model_doc/opt.md index 6df2c5cca4..d11f9cb3b4 100644 --- a/docs/source/en/model_doc/opt.md +++ b/docs/source/en/model_doc/opt.md @@ -1,3 +1,20 @@ + +*This model was released on 2022-05-02 and added to Hugging Face Transformers on 2022-05-12.* +
PyTorch diff --git a/docs/source/en/model_doc/owlv2.md b/docs/source/en/model_doc/owlv2.md index b3b444d58f..675dc1c9c0 100644 --- a/docs/source/en/model_doc/owlv2.md +++ b/docs/source/en/model_doc/owlv2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-16 and added to Hugging Face Transformers on 2023-10-13.* # OWLv2 diff --git a/docs/source/en/model_doc/owlvit.md b/docs/source/en/model_doc/owlvit.md index a69eee88c1..ceae23b4cf 100644 --- a/docs/source/en/model_doc/owlvit.md +++ b/docs/source/en/model_doc/owlvit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-05-12 and added to Hugging Face Transformers on 2022-07-22.* # OWL-ViT diff --git a/docs/source/en/model_doc/paligemma.md b/docs/source/en/model_doc/paligemma.md index a0a0c1b714..72ddde22ec 100644 --- a/docs/source/en/model_doc/paligemma.md +++ b/docs/source/en/model_doc/paligemma.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-10 and added to Hugging Face Transformers on 2024-05-14.*
diff --git a/docs/source/en/model_doc/patchtsmixer.md b/docs/source/en/model_doc/patchtsmixer.md index 15573330a0..5541f4d809 100644 --- a/docs/source/en/model_doc/patchtsmixer.md +++ b/docs/source/en/model_doc/patchtsmixer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-14 and added to Hugging Face Transformers on 2023-12-05.* # PatchTSMixer diff --git a/docs/source/en/model_doc/patchtst.md b/docs/source/en/model_doc/patchtst.md index 5d9a2f402e..504da22f10 100644 --- a/docs/source/en/model_doc/patchtst.md +++ b/docs/source/en/model_doc/patchtst.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-11-27 and added to Hugging Face Transformers on 2023-11-13.* # PatchTST diff --git a/docs/source/en/model_doc/pegasus.md b/docs/source/en/model_doc/pegasus.md index 3323ee3a3c..10755c902b 100644 --- a/docs/source/en/model_doc/pegasus.md +++ b/docs/source/en/model_doc/pegasus.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-12-18 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/pegasus_x.md b/docs/source/en/model_doc/pegasus_x.md index d581b2e9a3..b1c5c17d3f 100644 --- a/docs/source/en/model_doc/pegasus_x.md +++ b/docs/source/en/model_doc/pegasus_x.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-08-08 and added to Hugging Face Transformers on 2022-09-02.*
diff --git a/docs/source/en/model_doc/perceiver.md b/docs/source/en/model_doc/perceiver.md index eb930bd4bd..5414daf0f1 100644 --- a/docs/source/en/model_doc/perceiver.md +++ b/docs/source/en/model_doc/perceiver.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-07-30 and added to Hugging Face Transformers on 2021-12-08.* # Perceiver diff --git a/docs/source/en/model_doc/perception_lm.md b/docs/source/en/model_doc/perception_lm.md index 3982d521b9..ee6b63fce6 100644 --- a/docs/source/en/model_doc/perception_lm.md +++ b/docs/source/en/model_doc/perception_lm.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-17 and added to Hugging Face Transformers on 2025-07-11.* # PerceptionLM ## Overview -The PerceptionLM model was proposed in [PerceptionLM: Open-Access Data and Models for Detailed Visual Understanding](https://ai.meta.com/research/publications/perceptionlm-open-access-data-and-models-for-detailed-visual-understanding/) by Jang Hyun Cho et al. It's a fully open, reproducible model for transparent research in image and video understanding. PLM consists of +The [PerceptionLM](https://huggingface.co/papers/2504.13180) model was proposed in [PerceptionLM: Open-Access Data and Models for Detailed Visual Understanding](https://ai.meta.com/research/publications/perceptionlm-open-access-data-and-models-for-detailed-visual-understanding/) by Jang Hyun Cho et al. It's a fully open, reproducible model for transparent research in image and video understanding. PLM consists of a vision encoder with a small scale (<8B parameters) LLM decoder. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/persimmon.md b/docs/source/en/model_doc/persimmon.md index bf721f19a1..23be6fac04 100644 --- a/docs/source/en/model_doc/persimmon.md +++ b/docs/source/en/model_doc/persimmon.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-09-07 and added to Hugging Face Transformers on 2023-09-12.* # Persimmon diff --git a/docs/source/en/model_doc/phi.md b/docs/source/en/model_doc/phi.md index 10f53eb583..f416615fc2 100644 --- a/docs/source/en/model_doc/phi.md +++ b/docs/source/en/model_doc/phi.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-06-20 and added to Hugging Face Transformers on 2023-11-10.*
PyTorch diff --git a/docs/source/en/model_doc/phi3.md b/docs/source/en/model_doc/phi3.md index 77444d7955..8172472885 100644 --- a/docs/source/en/model_doc/phi3.md +++ b/docs/source/en/model_doc/phi3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-04-22 and added to Hugging Face Transformers on 2024-04-24.* # Phi-3 diff --git a/docs/source/en/model_doc/phi4_multimodal.md b/docs/source/en/model_doc/phi4_multimodal.md index f7d93d2617..e2513e3a14 100644 --- a/docs/source/en/model_doc/phi4_multimodal.md +++ b/docs/source/en/model_doc/phi4_multimodal.md @@ -8,6 +8,7 @@ specific language governing permissions and limitations under the License. ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer. --> +*This model was released on 2025-03-03 and added to Hugging Face Transformers on 2025-03-25.*
diff --git a/docs/source/en/model_doc/phimoe.md b/docs/source/en/model_doc/phimoe.md index 8395021411..05206e2a8f 100644 --- a/docs/source/en/model_doc/phimoe.md +++ b/docs/source/en/model_doc/phimoe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-04-22 and added to Hugging Face Transformers on 2024-10-04.* # PhiMoE diff --git a/docs/source/en/model_doc/phobert.md b/docs/source/en/model_doc/phobert.md index c1c4b8742b..d01571aaea 100644 --- a/docs/source/en/model_doc/phobert.md +++ b/docs/source/en/model_doc/phobert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-03-02 and added to Hugging Face Transformers on 2020-11-16.* # PhoBERT @@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. ## Overview -The PhoBERT model was proposed in [PhoBERT: Pre-trained language models for Vietnamese](https://www.aclweb.org/anthology/2020.findings-emnlp.92.pdf) by Dat Quoc Nguyen, Anh Tuan Nguyen. +The PhoBERT model was proposed in [PhoBERT: Pre-trained language models for Vietnamese](https://huggingface.co/papers/2003.00744) by Dat Quoc Nguyen, Anh Tuan Nguyen. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/pix2struct.md b/docs/source/en/model_doc/pix2struct.md index b03e73d246..c43c9b3b92 100644 --- a/docs/source/en/model_doc/pix2struct.md +++ b/docs/source/en/model_doc/pix2struct.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-10-07 and added to Hugging Face Transformers on 2023-03-22.* # Pix2Struct diff --git a/docs/source/en/model_doc/pixtral.md b/docs/source/en/model_doc/pixtral.md index 6adac0277f..70a7ceb9bb 100644 --- a/docs/source/en/model_doc/pixtral.md +++ b/docs/source/en/model_doc/pixtral.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-09-17 and added to Hugging Face Transformers on 2024-09-14.* # Pixtral @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The Pixtral model was released by the Mistral AI team in a [blog post](https://mistral.ai/news/pixtral-12b/). Pixtral is a multimodal version of [Mistral](mistral), incorporating a 400 million parameter vision encoder trained from scratch. +The [Pixtral](https://huggingface.co/papers/2410.07073) model was released by the Mistral AI team in a [blog post](https://mistral.ai/news/pixtral-12b/). Pixtral is a multimodal version of [Mistral](mistral), incorporating a 400 million parameter vision encoder trained from scratch. The intro from the blog says the following: diff --git a/docs/source/en/model_doc/plbart.md b/docs/source/en/model_doc/plbart.md index a885924530..d8ce330cb0 100644 --- a/docs/source/en/model_doc/plbart.md +++ b/docs/source/en/model_doc/plbart.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-10 and added to Hugging Face Transformers on 2022-02-18.* # PLBart diff --git a/docs/source/en/model_doc/poolformer.md b/docs/source/en/model_doc/poolformer.md index 46c84d04fa..b4b378ff43 100644 --- a/docs/source/en/model_doc/poolformer.md +++ b/docs/source/en/model_doc/poolformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-22 and added to Hugging Face Transformers on 2022-02-17.* # PoolFormer diff --git a/docs/source/en/model_doc/pop2piano.md b/docs/source/en/model_doc/pop2piano.md index 6f78233d2c..5f68b18050 100644 --- a/docs/source/en/model_doc/pop2piano.md +++ b/docs/source/en/model_doc/pop2piano.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2022-11-02 and added to Hugging Face Transformers on 2023-08-21.* # Pop2Piano diff --git a/docs/source/en/model_doc/prompt_depth_anything.md b/docs/source/en/model_doc/prompt_depth_anything.md index 271fc4e2c0..84960a33cc 100644 --- a/docs/source/en/model_doc/prompt_depth_anything.md +++ b/docs/source/en/model_doc/prompt_depth_anything.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-12-18 and added to Hugging Face Transformers on 2025-03-21.* # Prompt Depth Anything diff --git a/docs/source/en/model_doc/prophetnet.md b/docs/source/en/model_doc/prophetnet.md index 9085886cde..19131b75ce 100644 --- a/docs/source/en/model_doc/prophetnet.md +++ b/docs/source/en/model_doc/prophetnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-01-13 and added to Hugging Face Transformers on 2020-11-16.* # ProphetNet diff --git a/docs/source/en/model_doc/pvt.md b/docs/source/en/model_doc/pvt.md index 4b221c9791..e7902affe5 100644 --- a/docs/source/en/model_doc/pvt.md +++ b/docs/source/en/model_doc/pvt.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2021-02-24 and added to Hugging Face Transformers on 2023-07-24.* # Pyramid Vision Transformer (PVT) diff --git a/docs/source/en/model_doc/pvt_v2.md b/docs/source/en/model_doc/pvt_v2.md index b8ebe9198a..0d0ee3cca7 100644 --- a/docs/source/en/model_doc/pvt_v2.md +++ b/docs/source/en/model_doc/pvt_v2.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2021-06-25 and added to Hugging Face Transformers on 2024-03-13.* # Pyramid Vision Transformer V2 (PVTv2) diff --git a/docs/source/en/model_doc/qdqbert.md b/docs/source/en/model_doc/qdqbert.md index 64e00d6a43..76fe00c563 100644 --- a/docs/source/en/model_doc/qdqbert.md +++ b/docs/source/en/model_doc/qdqbert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-20 and added to Hugging Face Transformers on 2023-06-20.* # QDQBERT diff --git a/docs/source/en/model_doc/qwen2.md b/docs/source/en/model_doc/qwen2.md index 899d9dddf5..f6b89ee57d 100644 --- a/docs/source/en/model_doc/qwen2.md +++ b/docs/source/en/model_doc/qwen2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-15 and added to Hugging Face Transformers on 2024-01-17.*
diff --git a/docs/source/en/model_doc/qwen2_5_omni.md b/docs/source/en/model_doc/qwen2_5_omni.md index dec62d1f2c..5167336955 100644 --- a/docs/source/en/model_doc/qwen2_5_omni.md +++ b/docs/source/en/model_doc/qwen2_5_omni.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-03-26 and added to Hugging Face Transformers on 2025-04-14.* # Qwen2.5-Omni @@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. ## Overview -The [Qwen2.5-Omni](https://qwenlm.github.io/blog/) model is a unified multiple modalities model proposed in [Qwen2.5-Omni Technical Report]() from Qwen team, Alibaba Group. +The [Qwen2.5-Omni](https://qwenlm.github.io/blog/qwen2.5-omni/) model is a unified multiple modalities model proposed in [Qwen2.5-Omni Technical Report](https://huggingface.co/papers/2503.20215) from Qwen team, Alibaba Group. The abstract from the technical report is the following: diff --git a/docs/source/en/model_doc/qwen2_5_vl.md b/docs/source/en/model_doc/qwen2_5_vl.md index 57b88d1b8d..df4244ffa5 100644 --- a/docs/source/en/model_doc/qwen2_5_vl.md +++ b/docs/source/en/model_doc/qwen2_5_vl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-02-19 and added to Hugging Face Transformers on 2025-01-23.*
diff --git a/docs/source/en/model_doc/qwen2_audio.md b/docs/source/en/model_doc/qwen2_audio.md index 22e1effd27..cd52b5bac3 100644 --- a/docs/source/en/model_doc/qwen2_audio.md +++ b/docs/source/en/model_doc/qwen2_audio.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-15 and added to Hugging Face Transformers on 2024-08-08.* # Qwen2Audio diff --git a/docs/source/en/model_doc/qwen2_moe.md b/docs/source/en/model_doc/qwen2_moe.md index e765affc36..5de814e29c 100644 --- a/docs/source/en/model_doc/qwen2_moe.md +++ b/docs/source/en/model_doc/qwen2_moe.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-15 and added to Hugging Face Transformers on 2024-03-27.*
PyTorch diff --git a/docs/source/en/model_doc/qwen2_vl.md b/docs/source/en/model_doc/qwen2_vl.md index 926cb5bc4d..7b6a2910f8 100644 --- a/docs/source/en/model_doc/qwen2_vl.md +++ b/docs/source/en/model_doc/qwen2_vl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-08-29 and added to Hugging Face Transformers on 2024-08-26.* # Qwen2-VL @@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. ## Overview -The [Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/) model is a major update to [Qwen-VL](https://huggingface.co/papers/2308.12966) from the Qwen team at Alibaba Research. +The [Qwen2-VL](https://huggingface.co/papers/2409.12191) ([blog post](https://qwenlm.github.io/blog/qwen2-vl/)) model is a major update to [Qwen-VL](https://huggingface.co/papers/2308.12966) from the Qwen team at Alibaba Research. The abstract from the blog is the following: diff --git a/docs/source/en/model_doc/qwen3.md b/docs/source/en/model_doc/qwen3.md index e3f3c26609..87e6ba500f 100644 --- a/docs/source/en/model_doc/qwen3.md +++ b/docs/source/en/model_doc/qwen3.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-29 and added to Hugging Face Transformers on 2025-03-31.* # Qwen3 ## Overview -To be released with the official model launch. +[Qwen3](https://huggingface.co/papers/2505.09388) refers to the dense model architecture Qwen3-32B which was released with its mixture of experts variant [Qwen3MoE](qwen3_moe) ([blog post](https://qwenlm.github.io/blog/qwen3/)). ### Model Details diff --git a/docs/source/en/model_doc/qwen3_moe.md b/docs/source/en/model_doc/qwen3_moe.md index 1de4af1a5b..2d1090f1e9 100644 --- a/docs/source/en/model_doc/qwen3_moe.md +++ b/docs/source/en/model_doc/qwen3_moe.md @@ -13,12 +13,13 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-29 and added to Hugging Face Transformers on 2025-03-31.* # Qwen3MoE ## Overview -To be released with the official model launch. +[Qwen3MoE](https://huggingface.co/papers/2505.09388) refers to the mixture of experts model architecture Qwen3-235B-A22B which was released with its dense variant [Qwen3](qwen3) ([blog post](https://qwenlm.github.io/blog/qwen3/)). ### Model Details diff --git a/docs/source/en/model_doc/rag.md b/docs/source/en/model_doc/rag.md index 425d5c70d1..f44bf04c3b 100644 --- a/docs/source/en/model_doc/rag.md +++ b/docs/source/en/model_doc/rag.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-05-22 and added to Hugging Face Transformers on 2020-11-16.* # RAG diff --git a/docs/source/en/model_doc/realm.md b/docs/source/en/model_doc/realm.md index efff6717d8..da3d1c140f 100644 --- a/docs/source/en/model_doc/realm.md +++ b/docs/source/en/model_doc/realm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-02-10 and added to Hugging Face Transformers on 2023-06-20.* # REALM diff --git a/docs/source/en/model_doc/recurrent_gemma.md b/docs/source/en/model_doc/recurrent_gemma.md index b543b35a75..1cd4e784a5 100644 --- a/docs/source/en/model_doc/recurrent_gemma.md +++ b/docs/source/en/model_doc/recurrent_gemma.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-04-11 and added to Hugging Face Transformers on 2024-04-10.* # RecurrentGemma @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The Recurrent Gemma model was proposed in [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://storage.googleapis.com/deepmind-media/gemma/recurrentgemma-report.pdf) by the Griffin, RLHF and Gemma Teams of Google. +The Recurrent Gemma model was proposed in [RecurrentGemma: Moving Past Transformers for Efficient Open Language Models](https://huggingface.co/papers/2404.07839) by the Griffin, RLHF and Gemma Teams of Google. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/reformer.md b/docs/source/en/model_doc/reformer.md index e65c725d90..1c01fa7c05 100644 --- a/docs/source/en/model_doc/reformer.md +++ b/docs/source/en/model_doc/reformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-01-13 and added to Hugging Face Transformers on 2020-11-16.* # Reformer diff --git a/docs/source/en/model_doc/regnet.md b/docs/source/en/model_doc/regnet.md index a86176bcf2..1f5de7645f 100644 --- a/docs/source/en/model_doc/regnet.md +++ b/docs/source/en/model_doc/regnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-03-30 and added to Hugging Face Transformers on 2022-04-07.* # RegNet diff --git a/docs/source/en/model_doc/rembert.md b/docs/source/en/model_doc/rembert.md index 6cf0e35c2a..ffc9e0f91f 100644 --- a/docs/source/en/model_doc/rembert.md +++ b/docs/source/en/model_doc/rembert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-24 and added to Hugging Face Transformers on 2021-07-24.* # RemBERT diff --git a/docs/source/en/model_doc/resnet.md b/docs/source/en/model_doc/resnet.md index 03ad0b0c32..29551c87de 100644 --- a/docs/source/en/model_doc/resnet.md +++ b/docs/source/en/model_doc/resnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2015-12-10 and added to Hugging Face Transformers on 2022-03-14.* # ResNet diff --git a/docs/source/en/model_doc/retribert.md b/docs/source/en/model_doc/retribert.md index 795f81caaa..871bdc6e8c 100644 --- a/docs/source/en/model_doc/retribert.md +++ b/docs/source/en/model_doc/retribert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-12 and added to Hugging Face Transformers on 2023-06-20.* # RetriBERT @@ -31,7 +32,7 @@ You can do so by running the following command: `pip install -U transformers==4. ## Overview -The RetriBERT model was proposed in the blog post [Explain Anything Like I'm Five: A Model for Open Domain Long Form +The [RetriBERT](https://huggingface.co/yjernite/retribert-base-uncased/tree/main) model was proposed in the blog post [Explain Anything Like I'm Five: A Model for Open Domain Long Form Question Answering](https://yjernite.github.io/lfqa.html). RetriBERT is a small model that uses either a single or pair of BERT encoders with lower-dimension projection for dense semantic indexing of text. diff --git a/docs/source/en/model_doc/roberta-prelayernorm.md b/docs/source/en/model_doc/roberta-prelayernorm.md index 81b52fec02..60f3aa9cbf 100644 --- a/docs/source/en/model_doc/roberta-prelayernorm.md +++ b/docs/source/en/model_doc/roberta-prelayernorm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-04-01 and added to Hugging Face Transformers on 2022-12-19.* # RoBERTa-PreLayerNorm diff --git a/docs/source/en/model_doc/roberta.md b/docs/source/en/model_doc/roberta.md index 058bebad5b..7f77ed2b3f 100644 --- a/docs/source/en/model_doc/roberta.md +++ b/docs/source/en/model_doc/roberta.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-07-26 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/roc_bert.md b/docs/source/en/model_doc/roc_bert.md index 90373085a1..6350544787 100644 --- a/docs/source/en/model_doc/roc_bert.md +++ b/docs/source/en/model_doc/roc_bert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-05-27 and added to Hugging Face Transformers on 2022-11-08.*
diff --git a/docs/source/en/model_doc/roformer.md b/docs/source/en/model_doc/roformer.md index 48c652036e..06663ff491 100644 --- a/docs/source/en/model_doc/roformer.md +++ b/docs/source/en/model_doc/roformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-20 and added to Hugging Face Transformers on 2021-05-20.*
diff --git a/docs/source/en/model_doc/rt_detr.md b/docs/source/en/model_doc/rt_detr.md index aeee1f4c03..02accfd6d9 100644 --- a/docs/source/en/model_doc/rt_detr.md +++ b/docs/source/en/model_doc/rt_detr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-17 and added to Hugging Face Transformers on 2024-06-22.* # RT-DETR diff --git a/docs/source/en/model_doc/rt_detr_v2.md b/docs/source/en/model_doc/rt_detr_v2.md index 6390d36b07..f5eb54625c 100644 --- a/docs/source/en/model_doc/rt_detr_v2.md +++ b/docs/source/en/model_doc/rt_detr_v2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-07-24 and added to Hugging Face Transformers on 2025-02-06.* # RT-DETRv2 diff --git a/docs/source/en/model_doc/rwkv.md b/docs/source/en/model_doc/rwkv.md index 8b54c25204..4d9d6bbb88 100644 --- a/docs/source/en/model_doc/rwkv.md +++ b/docs/source/en/model_doc/rwkv.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-08-17 and added to Hugging Face Transformers on 2023-05-09.* # RWKV @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The RWKV model was proposed in [this repo](https://github.com/BlinkDL/RWKV-LM) +The RWKV model (version 4) was proposed in [this repo](https://github.com/BlinkDL/RWKV-LM) It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below). diff --git a/docs/source/en/model_doc/sam.md b/docs/source/en/model_doc/sam.md index ac73c107b8..ab708b5377 100644 --- a/docs/source/en/model_doc/sam.md +++ b/docs/source/en/model_doc/sam.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-05 and added to Hugging Face Transformers on 2023-04-19.* # SAM diff --git a/docs/source/en/model_doc/sam_hq.md b/docs/source/en/model_doc/sam_hq.md index 8e8e4e559f..4f5b5fc03b 100644 --- a/docs/source/en/model_doc/sam_hq.md +++ b/docs/source/en/model_doc/sam_hq.md @@ -1,3 +1,20 @@ + +*This model was released on 2023-06-02 and added to Hugging Face Transformers on 2025-04-28.* + # SAM-HQ ## Overview diff --git a/docs/source/en/model_doc/seamless_m4t.md b/docs/source/en/model_doc/seamless_m4t.md index d523408f78..c6f3a56f9b 100644 --- a/docs/source/en/model_doc/seamless_m4t.md +++ b/docs/source/en/model_doc/seamless_m4t.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-08-22 and added to Hugging Face Transformers on 2023-10-23.* # SeamlessM4T @@ -18,7 +19,7 @@ specific language governing permissions and limitations under the License. ## Overview -The SeamlessM4T model was proposed in [SeamlessM4T — Massively Multilingual & Multimodal Machine Translation](https://dl.fbaipublicfiles.com/seamless/seamless_m4t_paper.pdf) by the Seamless Communication team from Meta AI. +The SeamlessM4T model was proposed in [SeamlessM4T — Massively Multilingual & Multimodal Machine Translation](https://huggingface.co/papers/2308.11596) by the Seamless Communication team from Meta AI. This is the **version 1** release of the model. For the updated **version 2** release, refer to the [Seamless M4T v2 docs](https://huggingface.co/docs/transformers/main/model_doc/seamless_m4t_v2). diff --git a/docs/source/en/model_doc/seamless_m4t_v2.md b/docs/source/en/model_doc/seamless_m4t_v2.md index c98b7b4dd8..8a4ab82d2e 100644 --- a/docs/source/en/model_doc/seamless_m4t_v2.md +++ b/docs/source/en/model_doc/seamless_m4t_v2.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-12-08 and added to Hugging Face Transformers on 2023-11-30.* # SeamlessM4T-v2 @@ -18,7 +19,7 @@ specific language governing permissions and limitations under the License. ## Overview -The SeamlessM4T-v2 model was proposed in [Seamless: Multilingual Expressive and Streaming Speech Translation](https://ai.meta.com/research/publications/seamless-multilingual-expressive-and-streaming-speech-translation/) by the Seamless Communication team from Meta AI. +The SeamlessM4T-v2 model was proposed in [Seamless: Multilingual Expressive and Streaming Speech Translation](https://huggingface.co/papers/2312.05187) by the Seamless Communication team from Meta AI. SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the [previous version](https://huggingface.co/docs/transformers/main/model_doc/seamless_m4t). For more details on the differences between v1 and v2, refer to section [Difference with SeamlessM4T-v1](#difference-with-seamlessm4t-v1). diff --git a/docs/source/en/model_doc/segformer.md b/docs/source/en/model_doc/segformer.md index 730757aca5..4d434d1973 100644 --- a/docs/source/en/model_doc/segformer.md +++ b/docs/source/en/model_doc/segformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-05-31 and added to Hugging Face Transformers on 2021-10-28.* # SegFormer diff --git a/docs/source/en/model_doc/seggpt.md b/docs/source/en/model_doc/seggpt.md index 89f80871ac..9e8c08cf2d 100644 --- a/docs/source/en/model_doc/seggpt.md +++ b/docs/source/en/model_doc/seggpt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-06 and added to Hugging Face Transformers on 2024-02-26.* # SegGPT diff --git a/docs/source/en/model_doc/sew-d.md b/docs/source/en/model_doc/sew-d.md index a6648d2980..f8c7190c36 100644 --- a/docs/source/en/model_doc/sew-d.md +++ b/docs/source/en/model_doc/sew-d.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-14 and added to Hugging Face Transformers on 2021-10-15.* # SEW-D diff --git a/docs/source/en/model_doc/sew.md b/docs/source/en/model_doc/sew.md index 865b4943c3..b52849f7c3 100644 --- a/docs/source/en/model_doc/sew.md +++ b/docs/source/en/model_doc/sew.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-14 and added to Hugging Face Transformers on 2021-10-15.* # SEW diff --git a/docs/source/en/model_doc/shieldgemma2.md b/docs/source/en/model_doc/shieldgemma2.md index 0e53418a73..de8f0b35f5 100644 --- a/docs/source/en/model_doc/shieldgemma2.md +++ b/docs/source/en/model_doc/shieldgemma2.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-01 and added to Hugging Face Transformers on 2025-03-20.* # ShieldGemma 2 diff --git a/docs/source/en/model_doc/siglip.md b/docs/source/en/model_doc/siglip.md index e443a6f0cb..9388f2673f 100644 --- a/docs/source/en/model_doc/siglip.md +++ b/docs/source/en/model_doc/siglip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-03-27 and added to Hugging Face Transformers on 2024-01-08.*
diff --git a/docs/source/en/model_doc/siglip2.md b/docs/source/en/model_doc/siglip2.md index 830258f2fc..0b421ab40a 100644 --- a/docs/source/en/model_doc/siglip2.md +++ b/docs/source/en/model_doc/siglip2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-02-20 and added to Hugging Face Transformers on 2025-02-21.*
diff --git a/docs/source/en/model_doc/smollm3.md b/docs/source/en/model_doc/smollm3.md index 3d1c297f92..c822349a62 100644 --- a/docs/source/en/model_doc/smollm3.md +++ b/docs/source/en/model_doc/smollm3.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-08 and added to Hugging Face Transformers on 2025-06-25.*
@@ -24,7 +25,7 @@ rendered properly in your Markdown viewer. # SmolLM3 -SmolLM3 is a fully open, compact language model designed for efficient deployment while maintaining strong performance. It uses a Transformer decoder architecture with Grouped Query Attention (GQA) to reduce the kv cache, and no RoPE, enabling improved performance on long-context tasks. It is trained using a multi-stage training approach on high-quality public datasets across web, code, and math domains. The model is multilingual and supports very large context lengths. The instruct variant is optimized for reasoning and tool use. +[SmolLM3](https://huggingface.co/blog/smollm3) is a fully open, compact language model designed for efficient deployment while maintaining strong performance. It uses a Transformer decoder architecture with Grouped Query Attention (GQA) to reduce the kv cache, and no RoPE, enabling improved performance on long-context tasks. It is trained using a multi-stage training approach on high-quality public datasets across web, code, and math domains. The model is multilingual and supports very large context lengths. The instruct variant is optimized for reasoning and tool use. > [!TIP] > Click on the SmolLM3 models in the right sidebar for more examples of how to apply SmolLM3 to different language tasks. diff --git a/docs/source/en/model_doc/smolvlm.md b/docs/source/en/model_doc/smolvlm.md index f63ff7c40a..72b53818de 100644 --- a/docs/source/en/model_doc/smolvlm.md +++ b/docs/source/en/model_doc/smolvlm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-02-20 and added to Hugging Face Transformers on 2025-02-20.* # SmolVLM @@ -23,7 +24,7 @@ rendered properly in your Markdown viewer.
## Overview -SmolVLM2 is an adaptation of the Idefics3 model with two main differences: +[SmolVLM2](https://huggingface.co/papers/2504.05299) ([blog post](https://huggingface.co/blog/smolvlm2)) is an adaptation of the Idefics3 model with two main differences: - It uses SmolLM2 for the text model. - It supports multi-image and video inputs diff --git a/docs/source/en/model_doc/speech-encoder-decoder.md b/docs/source/en/model_doc/speech-encoder-decoder.md index 52f6634f9f..91f64e6c51 100644 --- a/docs/source/en/model_doc/speech-encoder-decoder.md +++ b/docs/source/en/model_doc/speech-encoder-decoder.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-14 and added to Hugging Face Transformers on 2021-09-01.* # Speech Encoder Decoder Models diff --git a/docs/source/en/model_doc/speech_to_text.md b/docs/source/en/model_doc/speech_to_text.md index 1b6c74892f..ae81d77985 100644 --- a/docs/source/en/model_doc/speech_to_text.md +++ b/docs/source/en/model_doc/speech_to_text.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-11 and added to Hugging Face Transformers on 2021-03-10.* # Speech2Text diff --git a/docs/source/en/model_doc/speech_to_text_2.md b/docs/source/en/model_doc/speech_to_text_2.md index 6d77e5ad39..a3d836455b 100644 --- a/docs/source/en/model_doc/speech_to_text_2.md +++ b/docs/source/en/model_doc/speech_to_text_2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-04-14 and added to Hugging Face Transformers on 2023-06-20.* # Speech2Text2 diff --git a/docs/source/en/model_doc/speecht5.md b/docs/source/en/model_doc/speecht5.md index d41a583d7a..317bb6dee2 100644 --- a/docs/source/en/model_doc/speecht5.md +++ b/docs/source/en/model_doc/speecht5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-14 and added to Hugging Face Transformers on 2023-02-03.* # SpeechT5 diff --git a/docs/source/en/model_doc/splinter.md b/docs/source/en/model_doc/splinter.md index 74e9ffc250..c3ef982da9 100644 --- a/docs/source/en/model_doc/splinter.md +++ b/docs/source/en/model_doc/splinter.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-01-02 and added to Hugging Face Transformers on 2021-08-17.* # Splinter diff --git a/docs/source/en/model_doc/squeezebert.md b/docs/source/en/model_doc/squeezebert.md index 2b91878296..70e409daf7 100644 --- a/docs/source/en/model_doc/squeezebert.md +++ b/docs/source/en/model_doc/squeezebert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-19 and added to Hugging Face Transformers on 2020-11-16.* # SqueezeBERT diff --git a/docs/source/en/model_doc/stablelm.md b/docs/source/en/model_doc/stablelm.md index b996b7fcf9..5742021a73 100644 --- a/docs/source/en/model_doc/stablelm.md +++ b/docs/source/en/model_doc/stablelm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-09-05 and added to Hugging Face Transformers on 2024-02-14.* # StableLM @@ -24,14 +25,14 @@ rendered properly in your Markdown viewer. ## Overview -`StableLM 3B 4E1T` was proposed in [`StableLM 3B 4E1T`: Technical Report](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Stability AI and is the first model in a series of multi-epoch pre-trained language models. +StableLM 3B 4E1T ([blog post](https://stability.ai/news/stable-lm-3b-sustainable-high-performance-language-models-smart-devices)) was proposed in [StableLM 3B 4E1T: Technical Report](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) by Stability AI and is the first model in a series of multi-epoch pre-trained language models. ### Model Details -`StableLM 3B 4E1T` is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs. +StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs. The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc. -We also provide `StableLM Zephyr 3B`, an instruction fine-tuned version of the model that can be used for chat-based applications. +We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications. ### Usage Tips diff --git a/docs/source/en/model_doc/starcoder2.md b/docs/source/en/model_doc/starcoder2.md index ecb405f4d2..f90a473d2f 100644 --- a/docs/source/en/model_doc/starcoder2.md +++ b/docs/source/en/model_doc/starcoder2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-02-29 and added to Hugging Face Transformers on 2024-02-28.* # Starcoder2 diff --git a/docs/source/en/model_doc/superglue.md b/docs/source/en/model_doc/superglue.md index acbf3561ca..6e11ea3154 100644 --- a/docs/source/en/model_doc/superglue.md +++ b/docs/source/en/model_doc/superglue.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-11-26 and added to Hugging Face Transformers on 2025-01-20.*
diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 27ab95ac67..3d3d7fb59e 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -11,6 +11,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2017-12-20 and added to Hugging Face Transformers on 2024-03-19.*
diff --git a/docs/source/en/model_doc/swiftformer.md b/docs/source/en/model_doc/swiftformer.md index 5f9c38d614..2ef242f708 100644 --- a/docs/source/en/model_doc/swiftformer.md +++ b/docs/source/en/model_doc/swiftformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-03-27 and added to Hugging Face Transformers on 2023-05-12.* # SwiftFormer diff --git a/docs/source/en/model_doc/swin.md b/docs/source/en/model_doc/swin.md index 2b7a711397..21c761aa29 100644 --- a/docs/source/en/model_doc/swin.md +++ b/docs/source/en/model_doc/swin.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-03-25 and added to Hugging Face Transformers on 2022-01-21.*
diff --git a/docs/source/en/model_doc/swin2sr.md b/docs/source/en/model_doc/swin2sr.md index 340594b80e..0837cf6fef 100644 --- a/docs/source/en/model_doc/swin2sr.md +++ b/docs/source/en/model_doc/swin2sr.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-22 and added to Hugging Face Transformers on 2022-12-16.* # Swin2SR diff --git a/docs/source/en/model_doc/swinv2.md b/docs/source/en/model_doc/swinv2.md index d1d3b15a77..87de7aefa4 100644 --- a/docs/source/en/model_doc/swinv2.md +++ b/docs/source/en/model_doc/swinv2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-18 and added to Hugging Face Transformers on 2022-07-27.*
diff --git a/docs/source/en/model_doc/switch_transformers.md b/docs/source/en/model_doc/switch_transformers.md index 842d67d3b9..d15527a478 100644 --- a/docs/source/en/model_doc/switch_transformers.md +++ b/docs/source/en/model_doc/switch_transformers.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-01-11 and added to Hugging Face Transformers on 2022-11-15.*
diff --git a/docs/source/en/model_doc/t5.md b/docs/source/en/model_doc/t5.md index 599f68acd2..a910cb525d 100644 --- a/docs/source/en/model_doc/t5.md +++ b/docs/source/en/model_doc/t5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-10-23 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/t5gemma.md b/docs/source/en/model_doc/t5gemma.md index 101bd1b8b4..d6dce98bc3 100644 --- a/docs/source/en/model_doc/t5gemma.md +++ b/docs/source/en/model_doc/t5gemma.md @@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-04-08 and added to Hugging Face Transformers on 2025-06-25.*
PyTorch @@ -24,9 +25,9 @@ rendered properly in your Markdown viewer. # T5Gemma -T5Gemma (aka encoder-decoder Gemma) was proposed in a [research paper](https://arxiv.org/abs/2504.06225) by Google. It is a family of encoder-decoder large language models, developed by adapting pretrained decoder-only models into encoder-decoder. T5Gemma includes pretrained and instruction-tuned variants. The architecture is based on transformer encoder-decoder design following T5, with improvements from Gemma 2: GQA, RoPE, GeGLU activation, RMSNorm, and interleaved local/global attention. +T5Gemma (aka encoder-decoder Gemma) was proposed in a [research paper](https://huggingface.co/papers/2504.06225) by Google. It is a family of encoder-decoder large language models, developed by adapting pretrained decoder-only models into encoder-decoder. T5Gemma includes pretrained and instruction-tuned variants. The architecture is based on transformer encoder-decoder design following T5, with improvements from Gemma 2: GQA, RoPE, GeGLU activation, RMSNorm, and interleaved local/global attention. -T5Gemma has two groups of model sizes: 1) [Gemma 2](https://ai.google.dev/gemma/docs/core/model_card_2) sizes (2B-2B, 9B-2B, and 9B-9B), which are based on the offical Gemma 2 models (2B and 9B); and 2) [T5](https://arxiv.org/abs/1910.10683) sizes (Small, Base, Large, and XL), where are pretrained under the Gemma 2 framework following T5 configuration. In addition, we also provide a model at ML size (medium large, ~2B in total), which is in-between T5 Large and T5 XL. +T5Gemma has two groups of model sizes: 1) [Gemma 2](https://ai.google.dev/gemma/docs/core/model_card_2) sizes (2B-2B, 9B-2B, and 9B-9B), which are based on the offical Gemma 2 models (2B and 9B); and 2) [T5](https://huggingface.co/papers/1910.10683) sizes (Small, Base, Large, and XL), where are pretrained under the Gemma 2 framework following T5 configuration. In addition, we also provide a model at ML size (medium large, ~2B in total), which is in-between T5 Large and T5 XL. The pretrained varaints are trained with two objectives: prefix language modeling with knowledge distillation (PrefixLM) and UL2, separately. We release both variants for each model size. The instruction-turned varaints was post-trained with supervised fine-tuning and reinforcement learning. diff --git a/docs/source/en/model_doc/t5v1.1.md b/docs/source/en/model_doc/t5v1.1.md index 7f10f30243..4b4ace6768 100644 --- a/docs/source/en/model_doc/t5v1.1.md +++ b/docs/source/en/model_doc/t5v1.1.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-02-12 and added to Hugging Face Transformers on 2023-06-20.* # T5v1.1 diff --git a/docs/source/en/model_doc/table-transformer.md b/docs/source/en/model_doc/table-transformer.md index 534ab49c64..b35df2aec3 100644 --- a/docs/source/en/model_doc/table-transformer.md +++ b/docs/source/en/model_doc/table-transformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-30 and added to Hugging Face Transformers on 2022-10-18.* # Table Transformer diff --git a/docs/source/en/model_doc/tapas.md b/docs/source/en/model_doc/tapas.md index 21eb697ee3..744be7d688 100644 --- a/docs/source/en/model_doc/tapas.md +++ b/docs/source/en/model_doc/tapas.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-04-05 and added to Hugging Face Transformers on 2020-12-15.* # TAPAS @@ -23,7 +24,7 @@ rendered properly in your Markdown viewer. ## Overview -The TAPAS model was proposed in [TAPAS: Weakly Supervised Table Parsing via Pre-training](https://www.aclweb.org/anthology/2020.acl-main.398) +The TAPAS model was proposed in [TAPAS: Weakly Supervised Table Parsing via Pre-training](https://huggingface.co/papers/2004.02349) by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising diff --git a/docs/source/en/model_doc/tapex.md b/docs/source/en/model_doc/tapex.md index 9694b098ea..ed9a64d96a 100644 --- a/docs/source/en/model_doc/tapex.md +++ b/docs/source/en/model_doc/tapex.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-07-16 and added to Hugging Face Transformers on 2023-06-20.* # TAPEX diff --git a/docs/source/en/model_doc/textnet.md b/docs/source/en/model_doc/textnet.md index 36382664b8..9c29a8b16b 100644 --- a/docs/source/en/model_doc/textnet.md +++ b/docs/source/en/model_doc/textnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-03 and added to Hugging Face Transformers on 2025-01-08.* # TextNet diff --git a/docs/source/en/model_doc/time_series_transformer.md b/docs/source/en/model_doc/time_series_transformer.md index a91633b6b0..c38671f00f 100644 --- a/docs/source/en/model_doc/time_series_transformer.md +++ b/docs/source/en/model_doc/time_series_transformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-12-01 and added to Hugging Face Transformers on 2022-09-30.* # Time Series Transformer diff --git a/docs/source/en/model_doc/timesfm.md b/docs/source/en/model_doc/timesfm.md index f7b6424a4c..32348bba07 100644 --- a/docs/source/en/model_doc/timesfm.md +++ b/docs/source/en/model_doc/timesfm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-10-14 and added to Hugging Face Transformers on 2025-04-16.* # TimesFM diff --git a/docs/source/en/model_doc/timesformer.md b/docs/source/en/model_doc/timesformer.md index c39a63a668..59e9ee7181 100644 --- a/docs/source/en/model_doc/timesformer.md +++ b/docs/source/en/model_doc/timesformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-02-09 and added to Hugging Face Transformers on 2022-12-02.* # TimeSformer diff --git a/docs/source/en/model_doc/trajectory_transformer.md b/docs/source/en/model_doc/trajectory_transformer.md index a2353c9414..fba51b1811 100644 --- a/docs/source/en/model_doc/trajectory_transformer.md +++ b/docs/source/en/model_doc/trajectory_transformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-03 and added to Hugging Face Transformers on 2023-06-20.* # Trajectory Transformer diff --git a/docs/source/en/model_doc/transfo-xl.md b/docs/source/en/model_doc/transfo-xl.md index 66f249f24e..a042737469 100644 --- a/docs/source/en/model_doc/transfo-xl.md +++ b/docs/source/en/model_doc/transfo-xl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-01-09 and added to Hugging Face Transformers on 2023-06-20.* # Transformer XL diff --git a/docs/source/en/model_doc/trocr.md b/docs/source/en/model_doc/trocr.md index 9abc2add4e..73dd017b4f 100644 --- a/docs/source/en/model_doc/trocr.md +++ b/docs/source/en/model_doc/trocr.md @@ -12,6 +12,7 @@ Unless required by applicable law or agreed to in writing, software distributed rendered properly in your Markdown viewer. specific language governing permissions and limitations under the License. --> +*This model was released on 2021-09-21 and added to Hugging Face Transformers on 2021-10-13.* # TrOCR diff --git a/docs/source/en/model_doc/tvlt.md b/docs/source/en/model_doc/tvlt.md index 949c8549f5..0699781768 100644 --- a/docs/source/en/model_doc/tvlt.md +++ b/docs/source/en/model_doc/tvlt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-09-28 and added to Hugging Face Transformers on 2023-06-20.* # TVLT diff --git a/docs/source/en/model_doc/tvp.md b/docs/source/en/model_doc/tvp.md index dd0f63e55e..49a538ffa8 100644 --- a/docs/source/en/model_doc/tvp.md +++ b/docs/source/en/model_doc/tvp.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-03-09 and added to Hugging Face Transformers on 2023-11-22.* # TVP diff --git a/docs/source/en/model_doc/udop.md b/docs/source/en/model_doc/udop.md index fd2a70d7ec..eb400cc39d 100644 --- a/docs/source/en/model_doc/udop.md +++ b/docs/source/en/model_doc/udop.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2022-12-05 and added to Hugging Face Transformers on 2024-03-04.* # UDOP diff --git a/docs/source/en/model_doc/ul2.md b/docs/source/en/model_doc/ul2.md index b3c1a22260..ca0bacbc43 100644 --- a/docs/source/en/model_doc/ul2.md +++ b/docs/source/en/model_doc/ul2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-05-10 and added to Hugging Face Transformers on 2023-06-20.* # UL2 diff --git a/docs/source/en/model_doc/umt5.md b/docs/source/en/model_doc/umt5.md index 736574373c..b0e5f02de4 100644 --- a/docs/source/en/model_doc/umt5.md +++ b/docs/source/en/model_doc/umt5.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-04-18 and added to Hugging Face Transformers on 2023-07-03.* # UMT5 @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The UMT5 model was proposed in [UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining](https://openreview.net/forum?id=kXwdL1cWOAi) by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant. +The UMT5 model was proposed in [UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining](https://arxiv.org/pdf/2304.09151) by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/unispeech-sat.md b/docs/source/en/model_doc/unispeech-sat.md index 8d0adb8e78..308155bbfe 100644 --- a/docs/source/en/model_doc/unispeech-sat.md +++ b/docs/source/en/model_doc/unispeech-sat.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-12 and added to Hugging Face Transformers on 2021-10-26.* # UniSpeech-SAT diff --git a/docs/source/en/model_doc/unispeech.md b/docs/source/en/model_doc/unispeech.md index a83f7600d5..98348b560d 100644 --- a/docs/source/en/model_doc/unispeech.md +++ b/docs/source/en/model_doc/unispeech.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-01-19 and added to Hugging Face Transformers on 2021-10-26.* # UniSpeech diff --git a/docs/source/en/model_doc/univnet.md b/docs/source/en/model_doc/univnet.md index 57492dcd68..e20bc5c405 100644 --- a/docs/source/en/model_doc/univnet.md +++ b/docs/source/en/model_doc/univnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-15 and added to Hugging Face Transformers on 2023-11-22.* # UnivNet diff --git a/docs/source/en/model_doc/upernet.md b/docs/source/en/model_doc/upernet.md index e215ec8621..2c2e50fc56 100644 --- a/docs/source/en/model_doc/upernet.md +++ b/docs/source/en/model_doc/upernet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2018-07-26 and added to Hugging Face Transformers on 2023-01-16.* # UPerNet diff --git a/docs/source/en/model_doc/van.md b/docs/source/en/model_doc/van.md index 0a25691823..0e07e314be 100644 --- a/docs/source/en/model_doc/van.md +++ b/docs/source/en/model_doc/van.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-02-20 and added to Hugging Face Transformers on 2023-06-20.* # VAN diff --git a/docs/source/en/model_doc/video_llava.md b/docs/source/en/model_doc/video_llava.md index b2051a91f2..bda7264369 100644 --- a/docs/source/en/model_doc/video_llava.md +++ b/docs/source/en/model_doc/video_llava.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-11-16 and added to Hugging Face Transformers on 2024-05-15.* # Video-LLaVA diff --git a/docs/source/en/model_doc/videomae.md b/docs/source/en/model_doc/videomae.md index ac3d6c044e..547cc2f0a1 100644 --- a/docs/source/en/model_doc/videomae.md +++ b/docs/source/en/model_doc/videomae.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-03-23 and added to Hugging Face Transformers on 2022-08-04.* # VideoMAE diff --git a/docs/source/en/model_doc/vilt.md b/docs/source/en/model_doc/vilt.md index 19146e3846..a8e63ec494 100644 --- a/docs/source/en/model_doc/vilt.md +++ b/docs/source/en/model_doc/vilt.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-02-05 and added to Hugging Face Transformers on 2022-01-19.* # ViLT diff --git a/docs/source/en/model_doc/vipllava.md b/docs/source/en/model_doc/vipllava.md index c60b172045..0d0a209c27 100644 --- a/docs/source/en/model_doc/vipllava.md +++ b/docs/source/en/model_doc/vipllava.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-12-01 and added to Hugging Face Transformers on 2023-12-13.* # VipLlava diff --git a/docs/source/en/model_doc/vision-encoder-decoder.md b/docs/source/en/model_doc/vision-encoder-decoder.md index 53c573be47..38abeb65b5 100644 --- a/docs/source/en/model_doc/vision-encoder-decoder.md +++ b/docs/source/en/model_doc/vision-encoder-decoder.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-21 and added to Hugging Face Transformers on 2021-10-13.* # Vision Encoder Decoder Models diff --git a/docs/source/en/model_doc/vision-text-dual-encoder.md b/docs/source/en/model_doc/vision-text-dual-encoder.md index 3106cb0ac3..b26fcc3104 100644 --- a/docs/source/en/model_doc/vision-text-dual-encoder.md +++ b/docs/source/en/model_doc/vision-text-dual-encoder.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-15 and added to Hugging Face Transformers on 2021-11-30.* # VisionTextDualEncoder diff --git a/docs/source/en/model_doc/visual_bert.md b/docs/source/en/model_doc/visual_bert.md index 9e4376d0d4..7a7ac24e4d 100644 --- a/docs/source/en/model_doc/visual_bert.md +++ b/docs/source/en/model_doc/visual_bert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-08-09 and added to Hugging Face Transformers on 2021-06-02.*
diff --git a/docs/source/en/model_doc/vit.md b/docs/source/en/model_doc/vit.md index d09fed4b3a..6221d83a31 100644 --- a/docs/source/en/model_doc/vit.md +++ b/docs/source/en/model_doc/vit.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-22 and added to Hugging Face Transformers on 2021-04-01.*
diff --git a/docs/source/en/model_doc/vit_hybrid.md b/docs/source/en/model_doc/vit_hybrid.md index c268c2fad3..e85e156f29 100644 --- a/docs/source/en/model_doc/vit_hybrid.md +++ b/docs/source/en/model_doc/vit_hybrid.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-22 and added to Hugging Face Transformers on 2023-06-20.* # Hybrid Vision Transformer (ViT Hybrid) diff --git a/docs/source/en/model_doc/vit_mae.md b/docs/source/en/model_doc/vit_mae.md index 787253f32f..515c5b8cba 100644 --- a/docs/source/en/model_doc/vit_mae.md +++ b/docs/source/en/model_doc/vit_mae.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-11 and added to Hugging Face Transformers on 2022-01-18.*
diff --git a/docs/source/en/model_doc/vit_msn.md b/docs/source/en/model_doc/vit_msn.md index 8835f01cd8..a54cfad46c 100644 --- a/docs/source/en/model_doc/vit_msn.md +++ b/docs/source/en/model_doc/vit_msn.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-04-14 and added to Hugging Face Transformers on 2022-09-22.* # ViTMSN diff --git a/docs/source/en/model_doc/vitdet.md b/docs/source/en/model_doc/vitdet.md index 738d83461b..539ae5e376 100644 --- a/docs/source/en/model_doc/vitdet.md +++ b/docs/source/en/model_doc/vitdet.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2022-03-30 and added to Hugging Face Transformers on 2023-08-29.* # ViTDet diff --git a/docs/source/en/model_doc/vitmatte.md b/docs/source/en/model_doc/vitmatte.md index f661de1622..519a2dd74d 100644 --- a/docs/source/en/model_doc/vitmatte.md +++ b/docs/source/en/model_doc/vitmatte.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2023-05-24 and added to Hugging Face Transformers on 2023-09-19.* # ViTMatte diff --git a/docs/source/en/model_doc/vitpose.md b/docs/source/en/model_doc/vitpose.md index f9ed726593..612d556550 100644 --- a/docs/source/en/model_doc/vitpose.md +++ b/docs/source/en/model_doc/vitpose.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2022-04-26 and added to Hugging Face Transformers on 2025-01-08.*
diff --git a/docs/source/en/model_doc/vits.md b/docs/source/en/model_doc/vits.md index 7a829b36ba..9633e6e35e 100644 --- a/docs/source/en/model_doc/vits.md +++ b/docs/source/en/model_doc/vits.md @@ -8,6 +8,7 @@ http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.--> +*This model was released on 2021-06-11 and added to Hugging Face Transformers on 2023-09-01.*
@@ -17,7 +18,7 @@ specific language governing permissions and limitations under the License.--> # VITS -[VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech)](https://hf.co/papers/2106.06103) is a end-to-end speech synthesis model, simplifying the traditional two-stage text-to-speech (TTS) systems. It's unique because it directly synthesizes speech from text using variational inference, adversarial learning, and normalizing flows to produce natural and expressive speech with diverse rhythms and intonations. +[VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech)](https://huggingface.co/papers/2106.06103) is a end-to-end speech synthesis model, simplifying the traditional two-stage text-to-speech (TTS) systems. It's unique because it directly synthesizes speech from text using variational inference, adversarial learning, and normalizing flows to produce natural and expressive speech with diverse rhythms and intonations. You can find all the original VITS checkpoints under the [AI at Meta](https://huggingface.co/facebook?search_models=mms-tts) organization. diff --git a/docs/source/en/model_doc/vivit.md b/docs/source/en/model_doc/vivit.md index cf32c749e2..61dddcb270 100644 --- a/docs/source/en/model_doc/vivit.md +++ b/docs/source/en/model_doc/vivit.md @@ -9,6 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> +*This model was released on 2021-03-29 and added to Hugging Face Transformers on 2023-07-11.* # Video Vision Transformer (ViViT) diff --git a/docs/source/en/model_doc/vjepa2.md b/docs/source/en/model_doc/vjepa2.md index b16875339e..bf409a5139 100644 --- a/docs/source/en/model_doc/vjepa2.md +++ b/docs/source/en/model_doc/vjepa2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-06-11 and added to Hugging Face Transformers on 2025-06-11.*
@@ -25,7 +26,7 @@ rendered properly in your Markdown viewer. # V-JEPA 2 -V-JEPA 2 is a self-supervised approach to training video encoders developed by FAIR, Meta. Using internet-scale video data, V-JEPA 2 attains state-of-the-art performance on motion understanding and human action anticipation tasks. V-JEPA 2-AC is a latent action-conditioned world model post-trained from V-JEPA 2 (using a small amount of robot trajectory interaction data) that solves robot manipulation tasks without environment-specific data collection or task-specific training or calibration. +[V-JEPA 2](https://huggingface.co/papers/2506.09985) ([blog post](https://ai.meta.com/blog/v-jepa-2-world-model-benchmarks/)) is a self-supervised approach to training video encoders developed by FAIR, Meta. Using internet-scale video data, V-JEPA 2 attains state-of-the-art performance on motion understanding and human action anticipation tasks. V-JEPA 2-AC is a latent action-conditioned world model post-trained from V-JEPA 2 (using a small amount of robot trajectory interaction data) that solves robot manipulation tasks without environment-specific data collection or task-specific training or calibration.
drawing diff --git a/docs/source/en/model_doc/voxtral.md b/docs/source/en/model_doc/voxtral.md index ad15631a96..f46c1188df 100644 --- a/docs/source/en/model_doc/voxtral.md +++ b/docs/source/en/model_doc/voxtral.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2025-07-15 and added to Hugging Face Transformers on 2025-07-18.* # Voxtral diff --git a/docs/source/en/model_doc/wav2vec2-bert.md b/docs/source/en/model_doc/wav2vec2-bert.md index c2cf464977..4edb67498a 100644 --- a/docs/source/en/model_doc/wav2vec2-bert.md +++ b/docs/source/en/model_doc/wav2vec2-bert.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-11-30 and added to Hugging Face Transformers on 2024-01-18.* # Wav2Vec2-BERT @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The Wav2Vec2-BERT model was proposed in [Seamless: Multilingual Expressive and Streaming Speech Translation](https://ai.meta.com/research/publications/seamless-multilingual-expressive-and-streaming-speech-translation/) by the Seamless Communication team from Meta AI. +The [Wav2Vec2-BERT](https://huggingface.co/papers/2312.05187) model was proposed in [Seamless: Multilingual Expressive and Streaming Speech Translation](https://ai.meta.com/research/publications/seamless-multilingual-expressive-and-streaming-speech-translation/) by the Seamless Communication team from Meta AI. This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification. diff --git a/docs/source/en/model_doc/wav2vec2-conformer.md b/docs/source/en/model_doc/wav2vec2-conformer.md index fa304b3a86..e2a56b450d 100644 --- a/docs/source/en/model_doc/wav2vec2-conformer.md +++ b/docs/source/en/model_doc/wav2vec2-conformer.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-10-11 and added to Hugging Face Transformers on 2022-05-17.* # Wav2Vec2-Conformer diff --git a/docs/source/en/model_doc/wav2vec2.md b/docs/source/en/model_doc/wav2vec2.md index 340ac4b193..401859a6c8 100644 --- a/docs/source/en/model_doc/wav2vec2.md +++ b/docs/source/en/model_doc/wav2vec2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-20 and added to Hugging Face Transformers on 2021-02-02.* # Wav2Vec2 diff --git a/docs/source/en/model_doc/wav2vec2_phoneme.md b/docs/source/en/model_doc/wav2vec2_phoneme.md index 863bdafca3..e669120f29 100644 --- a/docs/source/en/model_doc/wav2vec2_phoneme.md +++ b/docs/source/en/model_doc/wav2vec2_phoneme.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-09-23 and added to Hugging Face Transformers on 2021-12-17.* # Wav2Vec2Phoneme diff --git a/docs/source/en/model_doc/wavlm.md b/docs/source/en/model_doc/wavlm.md index 7dfe6f26bb..4925b5a1a0 100644 --- a/docs/source/en/model_doc/wavlm.md +++ b/docs/source/en/model_doc/wavlm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-10-26 and added to Hugging Face Transformers on 2021-12-16.* # WavLM diff --git a/docs/source/en/model_doc/whisper.md b/docs/source/en/model_doc/whisper.md index 4bb51d0ce8..5b426f0505 100644 --- a/docs/source/en/model_doc/whisper.md +++ b/docs/source/en/model_doc/whisper.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-12-06 and added to Hugging Face Transformers on 2022-10-05.*
@@ -28,7 +29,7 @@ rendered properly in your Markdown viewer. # Whisper -[Whisper](https://hf.co/papers/2212.04356) is a encoder-decoder (sequence-to-sequence) transformer pretrained on 680,000 hours of labeled audio data. This amount of pretraining data enables zero-shot performance on audio tasks in English and many other languages. The decoder allows Whisper to map the encoders learned speech representations to useful outputs, such as text, without additional fine-tuning. Whisper just works out of the box. +[Whisper](https://huggingface.co/papers/2212.04356) is a encoder-decoder (sequence-to-sequence) transformer pretrained on 680,000 hours of labeled audio data. This amount of pretraining data enables zero-shot performance on audio tasks in English and many other languages. The decoder allows Whisper to map the encoders learned speech representations to useful outputs, such as text, without additional fine-tuning. Whisper just works out of the box. You can find all the original Whisper checkpoints under the [Whisper](https://huggingface.co/collections/openai/whisper-release-6501bba2cf999715fd953013) collection. diff --git a/docs/source/en/model_doc/xclip.md b/docs/source/en/model_doc/xclip.md index ca78a68ae2..e3219f6d8f 100644 --- a/docs/source/en/model_doc/xclip.md +++ b/docs/source/en/model_doc/xclip.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-08-04 and added to Hugging Face Transformers on 2023-06-20.* # X-CLIP diff --git a/docs/source/en/model_doc/xglm.md b/docs/source/en/model_doc/xglm.md index 6c0c180727..5042492ef7 100644 --- a/docs/source/en/model_doc/xglm.md +++ b/docs/source/en/model_doc/xglm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-12-20 and added to Hugging Face Transformers on 2022-01-28.* # XGLM diff --git a/docs/source/en/model_doc/xlm-prophetnet.md b/docs/source/en/model_doc/xlm-prophetnet.md index 5d11a532f2..4dad4c0afa 100644 --- a/docs/source/en/model_doc/xlm-prophetnet.md +++ b/docs/source/en/model_doc/xlm-prophetnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-01-13 and added to Hugging Face Transformers on 2023-06-20.* # XLM-ProphetNet diff --git a/docs/source/en/model_doc/xlm-roberta-xl.md b/docs/source/en/model_doc/xlm-roberta-xl.md index 56306bcb4a..eba0218289 100644 --- a/docs/source/en/model_doc/xlm-roberta-xl.md +++ b/docs/source/en/model_doc/xlm-roberta-xl.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-05-02 and added to Hugging Face Transformers on 2022-01-29.*
diff --git a/docs/source/en/model_doc/xlm-roberta.md b/docs/source/en/model_doc/xlm-roberta.md index 80465da245..bac0bcf8ec 100644 --- a/docs/source/en/model_doc/xlm-roberta.md +++ b/docs/source/en/model_doc/xlm-roberta.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-11-05 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/xlm-v.md b/docs/source/en/model_doc/xlm-v.md index 05b4a42593..7001926dc2 100644 --- a/docs/source/en/model_doc/xlm-v.md +++ b/docs/source/en/model_doc/xlm-v.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-01-25 and added to Hugging Face Transformers on 2023-06-20.* # XLM-V diff --git a/docs/source/en/model_doc/xlm.md b/docs/source/en/model_doc/xlm.md index f0c376625d..83835b29dc 100644 --- a/docs/source/en/model_doc/xlm.md +++ b/docs/source/en/model_doc/xlm.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-01-22 and added to Hugging Face Transformers on 2020-11-16.*
diff --git a/docs/source/en/model_doc/xlnet.md b/docs/source/en/model_doc/xlnet.md index e35851d5d2..7b844f44c7 100644 --- a/docs/source/en/model_doc/xlnet.md +++ b/docs/source/en/model_doc/xlnet.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2019-06-19 and added to Hugging Face Transformers on 2020-11-16.* # XLNet diff --git a/docs/source/en/model_doc/xls_r.md b/docs/source/en/model_doc/xls_r.md index 238c703f3e..bc99bbcbee 100644 --- a/docs/source/en/model_doc/xls_r.md +++ b/docs/source/en/model_doc/xls_r.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-17 and added to Hugging Face Transformers on 2023-06-20.* # XLS-R diff --git a/docs/source/en/model_doc/xlsr_wav2vec2.md b/docs/source/en/model_doc/xlsr_wav2vec2.md index eceea3be20..5c2771278b 100644 --- a/docs/source/en/model_doc/xlsr_wav2vec2.md +++ b/docs/source/en/model_doc/xlsr_wav2vec2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2020-06-24 and added to Hugging Face Transformers on 2023-06-20.* # XLSR-Wav2Vec2 diff --git a/docs/source/en/model_doc/xlstm.md b/docs/source/en/model_doc/xlstm.md index ba47a5a97c..b239d631fb 100644 --- a/docs/source/en/model_doc/xlstm.md +++ b/docs/source/en/model_doc/xlstm.md @@ -13,16 +13,17 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-05-07 and added to Hugging Face Transformers on 2025-07-25.* # xLSTM ## Overview -The xLSTM model was proposed in [xLSTM: Extended Long Short-Term Memory](https://openreview.net/forum?id=ARAxPPIAhq) by Maximilian Beck*, Korbinian Pöppel*, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, Günter Klambauer, Johannes Brandstetter and Sepp Hochreiter. +The xLSTM model was proposed in [xLSTM: Extended Long Short-Term Memory](https://huggingface.co/papers/2405.04517) by Maximilian Beck*, Korbinian Pöppel*, Markus Spanring, Andreas Auer, Oleksandra Prudnikova, Michael Kopp, Günter Klambauer, Johannes Brandstetter and Sepp Hochreiter. xLSTM updates the original LSTM architecture to be competitive with Transformer models by introducing exponential gating, matrix memory expansion, and parallelizable training and ingestion. -The [7B model](https://hf.co/NX-AI/xLSTM-7b) variant was trained by the xLSTM team Maximilian Beck, Korbinian Pöppel, Phillip Lippe, Richard Kurle, Patrick Blies, Sebastian Böck and Sepp Hochreiter at NXAI. +The [7B model](https://huggingface.co/NX-AI/xLSTM-7b) variant was trained by the xLSTM team Maximilian Beck, Korbinian Pöppel, Phillip Lippe, Richard Kurle, Patrick Blies, Sebastian Böck and Sepp Hochreiter at NXAI. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/xmod.md b/docs/source/en/model_doc/xmod.md index e07601074c..1ae591c92c 100644 --- a/docs/source/en/model_doc/xmod.md +++ b/docs/source/en/model_doc/xmod.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2022-05-12 and added to Hugging Face Transformers on 2023-02-10.* # X-MOD @@ -22,7 +23,7 @@ rendered properly in your Markdown viewer. ## Overview -The X-MOD model was proposed in [Lifting the Curse of Multilinguality by Pre-training Modular Transformers](http://dx.doi.org/10.18653/v1/2022.naacl-main.255) by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe. +The X-MOD model was proposed in [Lifting the Curse of Multilinguality by Pre-training Modular Transformers](https://arxiv.org/abs/2205.06266) by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe. X-MOD extends multilingual masked language models like [XLM-R](xlm-roberta) to include language-specific modular components (_language adapters_) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen. The abstract from the paper is the following: diff --git a/docs/source/en/model_doc/yolos.md b/docs/source/en/model_doc/yolos.md index 2c4cc7bc48..516c7e656e 100644 --- a/docs/source/en/model_doc/yolos.md +++ b/docs/source/en/model_doc/yolos.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-06-01 and added to Hugging Face Transformers on 2022-05-02.*
PyTorch diff --git a/docs/source/en/model_doc/yoso.md b/docs/source/en/model_doc/yoso.md index 344fad9e12..f07e5aba08 100644 --- a/docs/source/en/model_doc/yoso.md +++ b/docs/source/en/model_doc/yoso.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2021-11-18 and added to Hugging Face Transformers on 2022-01-26.* # YOSO diff --git a/docs/source/en/model_doc/zamba.md b/docs/source/en/model_doc/zamba.md index b112c92d53..f9280f9379 100644 --- a/docs/source/en/model_doc/zamba.md +++ b/docs/source/en/model_doc/zamba.md @@ -13,13 +13,14 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-04-16 and added to Hugging Face Transformers on 2024-10-04.* # Zamba
PyTorch
-Zamba is a large language model (LLM) trained by Zyphra, and made available under an Apache 2.0 license. Please see the [Zyphra Hugging Face](https://huggingface.co/collections/zyphra/) repository for model weights. +[Zamba](https://huggingface.co/papers/2405.16712) ([blog post](https://www.zyphra.com/post/zamba)) is a large language model (LLM) trained by Zyphra, and made available under an Apache 2.0 license. Please see the [Zyphra Hugging Face](https://huggingface.co/collections/zyphra/) repository for model weights. This model was contributed by [pglo](https://huggingface.co/pglo). diff --git a/docs/source/en/model_doc/zamba2.md b/docs/source/en/model_doc/zamba2.md index 447fa27b69..cf31eeb1ea 100644 --- a/docs/source/en/model_doc/zamba2.md +++ b/docs/source/en/model_doc/zamba2.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2024-11-22 and added to Hugging Face Transformers on 2025-01-27.* # Zamba2
@@ -21,14 +22,14 @@ rendered properly in your Markdown viewer. SDPA
-Zamba2 is a large language model (LLM) trained by Zyphra, and made available under an Apache 2.0 license. Please see the [Zyphra Hugging Face](https://huggingface.co/collections/zyphra/) repository for model weights. +[Zamba2](https://huggingface.co/papers/2411.15242) is a large language model (LLM) trained by Zyphra, and made available under an Apache 2.0 license. Please see the [Zyphra Hugging Face](https://huggingface.co/collections/zyphra/) repository for model weights. This model was contributed by [pglo](https://huggingface.co/pglo). ## Model details -Zamba2-1.2B, Zamba2-2.7B and Zamba2-7B are hybrid models combining state-space models (Specifically [Mamba](https://github.com/state-spaces/mamba)) and transformer, and were trained using next-token prediction. Zamba2 uses shared transformer layers after every 6 mamba blocks. It uses the [Mistral v0.1 tokenizer](https://huggingface.co/mistralai/Mistral-7B-v0.1). We came to this architecture after a series of ablations at small scales. Zamba2-1.2B, Zamba2-2.7B and Zamba2-7B were pre-trained on 2T and 3T tokens, respectively. +[Zamba2-1.2B](https://www.zyphra.com/post/zamba2-mini), [Zamba2-2.7B](https://www.zyphra.com/post/zamba2-small) and [Zamba2-7B](https://www.zyphra.com/post/zamba2-7b) are hybrid models combining state-space models (Specifically [Mamba](https://github.com/state-spaces/mamba)) and transformer, and were trained using next-token prediction. Zamba2 uses shared transformer layers after every 6 mamba blocks. It uses the [Mistral v0.1 tokenizer](https://huggingface.co/mistralai/Mistral-7B-v0.1). We came to this architecture after a series of ablations at small scales. Zamba2-1.2B, Zamba2-2.7B and Zamba2-7B were pre-trained on 2T and 3T tokens, respectively. diff --git a/docs/source/en/model_doc/zoedepth.md b/docs/source/en/model_doc/zoedepth.md index d392b34abb..9ca7d3f30a 100644 --- a/docs/source/en/model_doc/zoedepth.md +++ b/docs/source/en/model_doc/zoedepth.md @@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> +*This model was released on 2023-02-23 and added to Hugging Face Transformers on 2024-07-08.*
diff --git a/utils/add_dates.py b/utils/add_dates.py new file mode 100644 index 0000000000..9efa831d30 --- /dev/null +++ b/utils/add_dates.py @@ -0,0 +1,282 @@ +import argparse +import os +import re +import subprocess +from typing import Optional + +from huggingface_hub import paper_info + + +ROOT = os.getcwd().split("utils")[0] +DOCS_PATH = os.path.join(ROOT, "docs/source/en/model_doc") +MODELS_PATH = os.path.join(ROOT, "src/transformers/models") + +COPYRIGHT_DISCLAIMER = """""" + +ARXIV_PAPERS_NOT_IN_HF_PAPERS = { + "gemma3n.md": "2506.06644", + "xmod.md": "2205.06266", +} + + +def get_modified_cards() -> list[str]: + """Get the list of model names from modified files in docs/source/en/model_doc/""" + + result = subprocess.check_output(["git", "status", "--porcelain"], text=True) + + model_names = [] + for line in result.strip().split("\n"): + if line: + # Split on whitespace and take the last part (filename) + filename = line.split()[-1] + if filename.startswith("docs/source/en/model_doc/") and filename.endswith(".md"): + model_name = os.path.splitext(os.path.basename(filename))[0] + if model_name not in ["auto", "timm_wrapper"]: + model_names.append(model_name) + + return model_names + + +def get_paper_link(model_card: Optional[str], path: Optional[str]) -> str: + """Get the first paper link from the model card content.""" + + if model_card is not None and not model_card.endswith(".md"): + model_card = f"{model_card}.md" + file_path = path or os.path.join(DOCS_PATH, f"{model_card}") + model_card = os.path.basename(file_path) + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + if "blog" in content or "report" in content or "post" in content: + print(f"Insert the release date of the blog post or technical report at the top of {model_card}") + return "blog" + + # Find known paper links + paper_ids = re.findall(r"https://huggingface\.co/papers/\d+\.\d+", content) + paper_ids += re.findall(r"https://arxiv\.org/abs/\d+\.\d+", content) + + # If no known paper links are found, look for other potential paper links + if len(paper_ids) == 0: + # Find all https links + all_https_links = re.findall(r"https://[^\s\)]+", content) + + # Filter out huggingface.co and github links + other_paper_links = [] + for link in all_https_links: + link = link.rstrip(".,;!?)") + if "huggingface.co" not in link and "github.com" not in link: + other_paper_links.append(link) + + # Remove duplicates while preserving order + other_paper_links = list(dict.fromkeys(other_paper_links)) + + if other_paper_links: + print(f"No Hugging Face or Arxiv papers found. The possible paper links found in {model_card}:") + for link in other_paper_links: + print(f" - {link}") + + return "No_paper" + + return paper_ids[0] + + +def get_first_commit_date(model_name: Optional[str]) -> str: + """Get the first commit date of the model's init file or model.md. This date is considered as the date the model was added to HF transformers""" + + if model_name.endswith(".md"): + model_name = f"{model_name[:-3]}" + + model_name_src = model_name + if "-" in model_name: + model_name_src = model_name.replace("-", "_") + file_path = os.path.join(MODELS_PATH, model_name_src, "__init__.py") + + # If the init file is not found (only true for legacy models), the doc's first commit date is used + if not os.path.exists(file_path): + file_path = os.path.join(DOCS_PATH, f"{model_name}.md") + + result = subprocess.check_output( + ["git", "log", "--reverse", "--pretty=format:%ad", "--date=iso", file_path], text=True + ) + return result.strip().split("\n")[0][:10] + + +def get_release_date(link: str) -> str: + if link.startswith("https://huggingface.co/papers/"): + link = link.replace("https://huggingface.co/papers/", "") + + try: + info = paper_info(link) + return info.published_at.date().isoformat() + except Exception as e: + print(f"Error fetching release date for the paper https://huggingface.co/papers/{link}: {e}") + + elif link.startswith("https://arxiv.org/abs/"): + print(f"This paper {link} is not yet available in Hugging Face papers, skipping the release date attachment.") + return r"{release_date}" + + +def replace_paper_links(file_path: str) -> bool: + """Replace arxiv links with huggingface links if valid, and replace hf.co with huggingface.co""" + + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + model_card = os.path.basename(file_path) + original_content = content + + # Replace hf.co with huggingface.co + content = content.replace("https://hf.co/", "https://huggingface.co/") + + # Find all arxiv links + arxiv_links = re.findall(r"https://arxiv\.org/abs/(\d+\.\d+)", content) + + for paper_id in arxiv_links: + try: + # Check if paper exists on huggingface + paper_info(paper_id) + # If no exception, replace the link + old_link = f"https://arxiv.org/abs/{paper_id}" + new_link = f"https://huggingface.co/papers/{paper_id}" + content = content.replace(old_link, new_link) + print(f"Replaced {old_link} with {new_link}") + + except Exception: + # Paper not available on huggingface, keep arxiv link + print(f"Paper {paper_id} for {model_card} is not available on huggingface, keeping the arxiv link") + continue + + # Write back only if content changed + if content != original_content: + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + return True + return False + + +def insert_dates(model_card_list: list[str]): + """Insert release and commit dates into model cards""" + + for model_card in model_card_list: + if not model_card.endswith(".md"): + model_card = f"{model_card}.md" + + if model_card == "auto.md" or model_card == "timm_wrapper.md": + continue + + file_path = os.path.join(DOCS_PATH, model_card) + + # First replace arxiv paper links with hf paper link if possible + links_replaced = replace_paper_links(file_path) + if links_replaced: + print(f"Updated paper links in {model_card}") + + pattern = ( + r"\n\*This model was released on (.*) and added to Hugging Face Transformers on (\d{4}-\d{2}-\d{2})\.\*" + ) + + # Check if the copyright disclaimer sections exists, if not, add one with 2025 + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + markers = list(re.finditer(r"-->", content)) # Dates info is placed right below this marker + if len(markers) == 0: + print(f"No marker found in {model_card}. Adding copyright disclaimer to the top.") + + # Add copyright disclaimer to the very top of the file + content = COPYRIGHT_DISCLAIMER + "\n\n" + content + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + markers = list(re.finditer(r"-->", content)) + + hf_commit_date = get_first_commit_date(model_name=model_card) + + match = re.search(pattern, content) + + # If the dates info line already exists, only check and update the hf_commit_date, don't modify the existing release date + if match: + release_date = match.group(1) # The release date part + existing_hf_date = match.group(2) # The existing HF date part + if existing_hf_date != hf_commit_date: + old_line = match.group(0) # Full matched line + new_line = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*" + + content = content.replace(old_line, new_line) + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + # If the dates info line does not exist, add it + else: + paper_link = get_paper_link(path=file_path) + release_date = "" + + if not (paper_link == "No_paper" or paper_link == "blog"): + release_date = get_release_date(paper_link) + else: + release_date = r"{release_date}" + + insert_index = markers[0].end() + + date_info = f"\n*This model was released on {release_date} and added to Hugging Face Transformers on {hf_commit_date}.*" + content = content[:insert_index] + date_info + content[insert_index:] + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + print(f"Added {model_card} release and commit dates.") + + +def get_all_model_cards(): + """Get all model cards from the docs path""" + + all_files = os.listdir(DOCS_PATH) + model_cards = [] + for file in all_files: + if file.endswith(".md"): + model_name = os.path.splitext(file)[0] + if model_name not in ["auto", "timm_wrapper"]: + model_cards.append(model_name) + return sorted(model_cards) + + +def main(all=False, auto=True, models=None): + if all: + model_cards = get_all_model_cards() + print(f"Processing all {len(model_cards)} model cards from docs directory") + elif auto: + model_cards = get_modified_cards() + if not model_cards: + print("No modified model cards found.") + return + print(f"Processing modified model cards: {model_cards}") + else: + model_cards = models + print(f"Processing specified model cards: {model_cards}") + + insert_dates(model_cards) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Add release and commit dates to model cards") + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--auto", action="store_true", help="Automatically process modified model cards from git status" + ) + group.add_argument("--models", nargs="+", help="Specify model cards to process (without .md extension)") + group.add_argument("--all", action="store_true", help="Process all model cards in the docs directory") + + parser.set_defaults(auto=True) + args = parser.parse_args() + + main(args.all, args.auto, args.models)