From b2b7fc781438c7d1d551cdac0a44af5ca0399797 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Mon, 5 Oct 2020 09:40:45 -0400 Subject: [PATCH] Check and update model list in index.rst automatically (#7527) * Check and update model list in index.rst automatically * Check and update model list in index.rst automatically * Adapt template --- docs/source/index.rst | 200 +++++++++++++------------ templates/adding_a_new_model/README.md | 2 +- utils/check_copies.py | 108 +++++++++++++ 3 files changed, 215 insertions(+), 95 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 231c0ed4dc..0545d46240 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -54,105 +54,117 @@ The documentation is organized in five parts: The library currently contains PyTorch and Tensorflow implementations, pre-trained model weights, usage scripts and conversion utilities for the following models: -1. `ALBERT `_ (from Google Research), released together with the paper - `ALBERT: A Lite BERT for Self-supervised Learning of Language Representations `_ - by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. -2. `BART `_ (from Facebook) released with the paper - `BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension - `_ by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman - Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. -3. `BERT `_ (from Google) released with the paper `BERT: Pre-training of Deep - Bidirectional Transformers for Language Understanding `_ by Jacob Devlin, Ming-Wei - Chang, Kenton Lee, and Kristina Toutanova. -4. `BERT For Sequence Generation `_ - (from Google) released with the paper `Leveraging Pre-trained Checkpoints for Sequence Generation Tasks - `_ by Sascha Rothe, Shashi Narayan, Aliaksei Severyn. -5. `CamemBERT `_ (from FAIR, Inria, Sorbonne Université) - released together with the paper `CamemBERT: a Tasty French Language Model `_ by - Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suarez, Yoann Dupont, Laurent Romary, Eric Villemonte de la - Clergerie, Djame Seddah, and Benoît Sagot. -6. `CTRL `_ (from Salesforce), released together with the - paper `CTRL: A Conditional Transformer Language Model for Controllable Generation - `_ by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong, - and Richard Socher. -7. `DeBERTa `_ (from Microsoft Research) released with the - paper `DeBERTa: Decoding-enhanced BERT with Disentangled Attention `_ by Pengcheng - He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. -8. `DialoGPT `_ (from Microsoft Research) released with the paper `DialoGPT: - Large-Scale Generative Pre-training for Conversational Response Generation `_ by - Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, - and Bill Dolan. -9. `DistilBERT `_ (from HuggingFace) released together +.. + This list is updated automatically from the README with `make fix-copies`. Do not update manually! + +1. `ALBERT `__ (from Google Research and the Toyota + Technological Institute at Chicago) released with the paper `ALBERT: A Lite BERT for Self-supervised Learning of + Language Representations `__, by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, + Kevin Gimpel, Piyush Sharma, Radu Soricut. +2. `BART `__ (from Facebook) released with the paper `BART: + Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension + `__ by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman + Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer. +3. `BERT `__ (from Google) released with the paper `BERT: + Pre-training of Deep Bidirectional Transformers for Language Understanding `__ by + Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. +4. `BERT For Sequence Generation `__ (from + Google) released with the paper `Leveraging Pre-trained Checkpoints for Sequence Generation Tasks + `__ by Sascha Rothe, Shashi Narayan, Aliaksei Severyn. +5. `CamemBERT `__ (from Inria/Facebook/Sorbonne) released + with the paper `CamemBERT: a Tasty French Language Model `__ by Louis Martin*, + Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé + Seddah and Benoît Sagot. +6. `CTRL `__ (from Salesforce) released with the paper `CTRL: + A Conditional Transformer Language Model for Controllable Generation `__ by Nitish + Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher. +7. `DeBERTa `__ (from Microsoft Research) released with the + paper `DeBERTa: Decoding-enhanced BERT with Disentangled Attention `__ by + Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. +8. `DialoGPT `__ (from Microsoft Research) released with + the paper `DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation + `__ by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang + Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan. +9. `DistilBERT `__ (from HuggingFace), released together with the paper `DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter - `_ by Victor Sanh, Lysandre Debut, and Thomas Wolf. The same method has been - applied to compress GPT2 into - `DistilGPT2 `_. -10. `DPR `_ (from Facebook) released with the paper `Dense Passage Retrieval - for Open-Domain Question Answering `_ by Vladimir Karpukhin, Barlas Oğuz, Sewon + `__ by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been + applied to compress GPT2 into `DistilGPT2 + `__, RoBERTa into `DistilRoBERTa + `__, Multilingual BERT into + `DistilmBERT `__ and a German version + of DistilBERT. +10. `DPR `__ (from Facebook) released with the paper `Dense Passage Retrieval + for Open-Domain Question Answering `__ by Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. -11. `ELECTRA `_ (from Google Research/Stanford University) released with - the paper `ELECTRA: Pre-training text encoders as discriminators rather than generators - `_ by Kevin Clark, Minh-Thang Luong, Quoc V. Le, and Christopher D. Manning. -12. `FlauBERT `_ (from CNRS) released with the paper `FlauBERT: Unsupervised - Language Model Pre-training for French `_ by Hang Le, Loïc Vial, Jibril Frej, - Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, Laurent Besacier, and - Didier Schwab. -13. `Funnel Transformer `_ (from CMU/Google Brain) released with the paper - `Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing - `_ by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le. -14. `GPT `_ (from OpenAI) released with the paper `Improving Language - Understanding by Generative Pre-Training `_ by Alec Radford, Karthik - Narasimhan, Tim Salimans, and Ilya Sutskever. -15. `GPT-2 `_ (from OpenAI) released with the paper `Language Models are - Unsupervised Multitask Learners `_ by Alec Radford, Jeffrey Wu, - Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. -16. `LayoutLM `_ (from Microsoft Research Asia) released with +11. `ELECTRA `__ (from Google Research/Stanford University) + released with the paper `ELECTRA: Pre-training text encoders as discriminators rather than generators + `__ by Kevin Clark, Minh-Thang Luong, Quoc V. Le, Christopher D. Manning. +12. `FlauBERT `__ (from CNRS) released with the paper + `FlauBERT: Unsupervised Language Model Pre-training for French `__ by Hang Le, + Loïc Vial, Jibril Frej, Vincent Segonne, Maximin Coavoux, Benjamin Lecouteux, Alexandre Allauzen, Benoît Crabbé, + Laurent Besacier, Didier Schwab. +13. `Funnel Transformer `__ (from CMU/Google Brain) released with the + paper `Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing + `__ by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le. +14. `GPT `__ (from OpenAI) released with the paper `Improving + Language Understanding by Generative Pre-Training `__ by Alec + Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. +15. `GPT-2 `__ (from OpenAI) released with the paper `Language + Models are Unsupervised Multitask Learners `__ by Alec Radford*, + Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**. +16. `LayoutLM `__ (from Microsoft Research Asia) released with the paper `LayoutLM: Pre-training of Text and Layout for Document Image Understanding - `_ by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou. -17. `Longformer `_ (from AllenAI) released with the paper `Longformer: The - Long-Document Transformer `_ by Iz Beltagy, Matthew E. Peters, and Arman Cohan. -18. `LXMERT `_ (from UNC Chapel Hill) released with the paper `LXMERT: Learning - Cross-Modality Encoder Representations from Transformers for Open-Domain Question - Answering `_ by Hao Tan and Mohit Bansal. -19. `MarianMT `_ (developed by the Microsoft Translator Team) machine translation models - trained using `OPUS `_ pretrained_models data by Jörg Tiedemann. -20. `MBart `_ (from Facebook) released with the paper - `Multilingual Denoising Pre-training for Neural Machine Translation `_ by Yinhan + `__ by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou. +17. `Longformer `__ (from AllenAI) released with the + paper `Longformer: The Long-Document Transformer `__ by Iz Beltagy, Matthew E. + Peters, Arman Cohan. +18. `LXMERT `__ (from UNC Chapel Hill) released with the paper `LXMERT: Learning + Cross-Modality Encoder Representations from Transformers for Open-Domain Question Answering + `__ by Hao Tan and Mohit Bansal. +19. `MarianMT `__ Machine translation models trained using + `OPUS `__ data by Jörg Tiedemann. The `Marian Framework `__ is + being developed by the Microsoft Translator Team. +20. `MBart `__ (from Facebook) released with the paper + `Multilingual Denoising Pre-training for Neural Machine Translation `__ by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer. -21. `MMBT `_ (from Facebook), released together with the paper a `Supervised - Multimodal Bitransformers for Classifying Images and Text `_ by Douwe Kiela, - Suvrat Bhooshan, Hamed Firooz, and Davide Testuggine. -22. `Pegasus `_ (from Google) released with the paper `PEGASUS: - Pre-training with Extracted Gap-sentences for Abstractive Summarization `_ by +21. `MMBT `__ (from Facebook), released together with the paper a + `Supervised Multimodal Bitransformers for Classifying Images and Text `__ by + Douwe Kiela, Suvrat Bhooshan, Hamed Firooz, Davide Testuggine. +22. `Pegasus `__ (from Google) released with the paper `PEGASUS: + Pre-training with Extracted Gap-sentences for Abstractive Summarization `__> by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu. -23. `Reformer `_ (from Google Research) released with - the paper `Reformer: The Efficient Transformer `_ by Nikita Kitaev, Łukasz - Kaiser, and Anselm Levskaya. -24. `RoBERTa `_ (from Facebook), released together with - the paper a `Robustly Optimized BERT Pretraining Approach `_ by Yinhan Liu, Myle - Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin - Stoyanov. -25. `T5 `_ (from Google) released with the paper - `Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer - `_ by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, - Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. -26. `Transformer-XL `_ (from Google/CMU) released with the paper - `Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context `_ by - Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, and Ruslan Salakhutdinov. -27. `XLM `_ (from Facebook) released together with the paper `Cross-lingual - Language Model Pretraining `_ by Guillaume Lample and Alexis Conneau. -28. `XLM-RoBERTa `_ (from Facebook AI), released together - with the paper `Unsupervised Cross-lingual Representation Learning at Scale `_ by - Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard - Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. -29. `XLNet `_ (from Google/CMU) released with the paper `​XLNet: Generalized - Autoregressive Pretraining for Language Understanding `_ by Zhilin Yang, Zihang - Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, and Quoc V. Le. -30. SqueezeBERT (from UC Berkeley) released with the paper - `SqueezeBERT: What can computer vision teach NLP about efficient neural networks? `_ - by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. -31. `Other community models `_, contributed by the `community - `_. +23. `Reformer `__ (from Google Research) released with the + paper `Reformer: The Efficient Transformer `__ by Nikita Kitaev, Łukasz Kaiser, + Anselm Levskaya. +24. `RoBERTa `__ (from Facebook), released together with + the paper a `Robustly Optimized BERT Pretraining Approach `__ by Yinhan Liu, Myle + Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. + ultilingual BERT into `DistilmBERT + `__ and a German version of + DistilBERT. +25. `SqueezeBert `__ released with the paper + `SqueezeBERT: What can computer vision teach NLP about efficient neural networks? + `__ by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, and Kurt W. Keutzer. +26. `T5 `__ (from Google AI) released with the paper `Exploring + the Limits of Transfer Learning with a Unified Text-to-Text Transformer `__ by + Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi + Zhou and Wei Li and Peter J. Liu. +27. `Transformer-XL `__ (from Google/CMU) released + with the paper `Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context + `__ by Zihang Dai*, Zhilin Yang*, Yiming Yang, Jaime Carbonell, Quoc V. Le, + Ruslan Salakhutdinov. +28. `XLM `__ (from Facebook) released together with the paper + `Cross-lingual Language Model Pretraining `__ by Guillaume Lample and Alexis + Conneau. +29. `XLM-RoBERTa `__ (from Facebook AI), released + together with the paper `Unsupervised Cross-lingual Representation Learning at Scale + `__ by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, + Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. +30. `XLNet `__ (from Google/CMU) released with the paper + `​XLNet: Generalized Autoregressive Pretraining for Language Understanding `__ by + Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le. +31. `Other community models `__, contributed by the `community + `__. .. toctree:: :maxdepth: 2 diff --git a/templates/adding_a_new_model/README.md b/templates/adding_a_new_model/README.md index c85f921caf..862c7f42ce 100644 --- a/templates/adding_a_new_model/README.md +++ b/templates/adding_a_new_model/README.md @@ -83,7 +83,7 @@ You can then finish the addition step by adding imports for your classes in the - [ ] Edit the PyTorch to TF 2.0 conversion script to add your model in the `convert_pytorch_checkpoint_to_tf2.py` file. - [ ] Add a mention of your model in the doc: `README.md` and the documentation itself - in `docs/source/index.rst` and `docs/source/pretrained_models.rst`. + in `docs/source/pretrained_models.rst`. Rune `make fix-copies` to update `docs/source/index.rst` with your changes. - [ ] Upload the pretrained weights, configurations and vocabulary files. - [ ] Create model card(s) for your models on huggingface.co. For those last two steps, check the [model sharing documentation](https://huggingface.co/transformers/model_sharing.html). diff --git a/utils/check_copies.py b/utils/check_copies.py index 2d4b9fbc06..1a7fcfe04b 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -23,6 +23,7 @@ import tempfile # All paths are set with the intent you should run this script from the root of the repo with the command # python utils/check_copies.py TRANSFORMERS_PATH = "src/transformers" +PATH_TO_DOCS = "docs/source" def find_code_in_transformers(object_name): @@ -166,6 +167,113 @@ def check_copies(overwrite: bool = False): + diff + "\nRun `make fix-copies` or `python utils/check_copies --fix_and_overwrite` to fix them." ) + check_model_list_copy(overwrite=overwrite) + + +def get_model_list(): + """ Extracts the model list from the README. """ + # If the introduction or the conclusion of the list change, the prompts may need to be updated. + _start_prompt = "🤗 Transformers currently provides the following architectures" + _end_prompt = "1. Want to contribute a new model?" + with open(os.path.join("README.md"), "r", encoding="utf-8") as f: + lines = f.readlines() + # Find the start of the list. + start_index = 0 + while not lines[start_index].startswith(_start_prompt): + start_index += 1 + start_index += 1 + + result = [] + current_line = "" + end_index = start_index + + while not lines[end_index].startswith(_end_prompt): + if lines[end_index].startswith("1."): + if len(current_line) > 1: + result.append(current_line) + current_line = lines[end_index] + elif len(lines[end_index]) > 1: + current_line = f"{current_line[:-1]} {lines[end_index].lstrip()}" + end_index += 1 + if len(current_line) > 1: + result.append(current_line) + + return "".join(result) + + +def split_long_line_with_indent(line, max_per_line, indent): + """ Split the `line` so that it doesn't go over `max_per_line` and adds `indent` to new lines. """ + words = line.split(" ") + lines = [] + current_line = words[0] + for word in words[1:]: + if len(f"{current_line} {word}") > max_per_line: + lines.append(current_line) + current_line = " " * indent + word + else: + current_line = f"{current_line} {word}" + lines.append(current_line) + return "\n".join(lines) + + +def convert_to_rst(model_list, max_per_line=None): + """ Convert `model_list` to rst format. """ + # Convert **[description](link)** to `description `__ + model_list = re.sub(r"\*\*\[([^\]]*)\]\(([^\)]*)\)\*\*", r"`\1 <\2>`__", model_list) + + # Convert [description](link) to `description `__ + model_list = re.sub(r"\[([^\]]*)\]\(([^\)]*)\)", r"`\1 <\2>`__", model_list) + + # Enumerate the lines properly + lines = model_list.split("\n") + result = [] + for i, line in enumerate(lines): + line = re.sub(r"^\s*(\d+)\.", f"{i+1}.", line) + # Split the lines that are too long + if max_per_line is not None and len(line) > max_per_line: + prompt = re.search(r"^(\s*\d+\.\s+)\S", line) + indent = len(prompt.groups()[0]) if prompt is not None else 0 + line = split_long_line_with_indent(line, max_per_line, indent) + + result.append(line) + return "\n".join(result) + + +def check_model_list_copy(overwrite=False, max_per_line=119): + """ Check the model lists in the README and index.rst are consistent and maybe `overwrite`. """ + _start_prompt = " This list is updated automatically from the README" + _end_prompt = ".. toctree::" + with open(os.path.join(PATH_TO_DOCS, "index.rst"), "r", encoding="utf-8") as f: + lines = f.readlines() + # Find the start of the list. + start_index = 0 + while not lines[start_index].startswith(_start_prompt): + start_index += 1 + start_index += 1 + + end_index = start_index + while not lines[end_index].startswith(_end_prompt): + end_index += 1 + end_index -= 1 + + while len(lines[start_index]) <= 1: + start_index += 1 + while len(lines[end_index]) <= 1: + end_index -= 1 + end_index += 1 + + rst_list = "".join(lines[start_index:end_index]) + md_list = get_model_list() + converted_list = convert_to_rst(md_list, max_per_line=max_per_line) + + if converted_list != rst_list: + if overwrite: + with open(os.path.join(PATH_TO_DOCS, "index.rst"), "w", encoding="utf-8") as f: + f.writelines(lines[:start_index] + [converted_list] + lines[end_index:]) + else: + raise ValueError( + "The model list in the README changed and the list in `index.rst` has not been updated. Run `make fix-copies` to fix this." + ) if __name__ == "__main__":