Migrate doc files to Markdown. (#24376)

* Rename index.mdx to index.md * With saved modifs * Address review comment * Treat all files * .mdx -> .md * Remove special char * Update utils/tests_fetcher.py Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr> --------- Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr>
2023-06-20 18:07:47 -04:00
parent b0513b013b
commit eb849f6604
472 changed files with 1909 additions and 98 deletions
--- a/utils/check_copies.py
+++ b/utils/check_copies.py
@@ -440,7 +440,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):

    # If the introduction or the conclusion of the list change, the prompts may need to be updated.
    index_list, start_index, end_index, lines = _find_text_in_file(
-        filename=os.path.join(PATH_TO_DOCS, "index.mdx"),
+        filename=os.path.join(PATH_TO_DOCS, "index.md"),
        start_prompt="<!--This list is updated automatically from the README",
        end_prompt="### Supported frameworks",
    )
@@ -464,11 +464,11 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
    converted_md_list = convert_readme_to_index(md_list)
    if converted_md_list != index_list:
        if overwrite:
-            with open(os.path.join(PATH_TO_DOCS, "index.mdx"), "w", encoding="utf-8", newline="\n") as f:
+            with open(os.path.join(PATH_TO_DOCS, "index.md"), "w", encoding="utf-8", newline="\n") as f:
                f.writelines(lines[:start_index] + [converted_md_list] + lines[end_index:])
        else:
            raise ValueError(
-                "The model list in the README changed and the list in `index.mdx` has not been updated. Run "
+                "The model list in the README changed and the list in `index.md` has not been updated. Run "
                "`make fix-copies` to fix this."
            )

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -756,7 +756,7 @@ def find_all_documented_objects():
            content = f.read()
        raw_doc_objs = re.findall(r"(?:autoclass|autofunction):: transformers.(\S+)\s+", content)
        documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs]
-    for doc_file in Path(PATH_TO_DOC).glob("**/*.mdx"):
+    for doc_file in Path(PATH_TO_DOC).glob("**/*.md"):
        with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
            content = f.read()
        raw_doc_objs = re.findall(r"\[\[autodoc\]\]\s+(\S+)\s+", content)
@@ -908,7 +908,7 @@ def check_all_objects_are_documented():
 def check_model_type_doc_match():
    """Check all doc pages have a corresponding model type."""
    model_doc_folder = Path(PATH_TO_DOC) / "model_doc"
-    model_docs = [m.stem for m in model_doc_folder.glob("*.mdx")]
+    model_docs = [m.stem for m in model_doc_folder.glob("*.md")]

    model_types = list(transformers.models.auto.configuration_auto.MODEL_NAMES_MAPPING.keys())
    model_types = [MODEL_TYPE_TO_DOC_MAPPING[m] if m in MODEL_TYPE_TO_DOC_MAPPING else m for m in model_types]
--- a/utils/check_table.py
+++ b/utils/check_table.py
@@ -157,7 +157,7 @@ def get_model_table_from_auto_modules():
 def check_model_table(overwrite=False):
    """Check the model table in the index.rst is consistent with the state of the lib and maybe `overwrite`."""
    current_table, start_index, end_index, lines = _find_text_in_file(
-        filename=os.path.join(PATH_TO_DOCS, "index.mdx"),
+        filename=os.path.join(PATH_TO_DOCS, "index.md"),
        start_prompt="<!--This table is updated automatically from the auto modules",
        end_prompt="<!-- End table-->",
    )
@@ -165,11 +165,11 @@ def check_model_table(overwrite=False):

    if current_table != new_table:
        if overwrite:
-            with open(os.path.join(PATH_TO_DOCS, "index.mdx"), "w", encoding="utf-8", newline="\n") as f:
+            with open(os.path.join(PATH_TO_DOCS, "index.md"), "w", encoding="utf-8", newline="\n") as f:
                f.writelines(lines[:start_index] + [new_table] + lines[end_index:])
        else:
            raise ValueError(
-                "The model table in the `index.mdx` has not been updated. Run `make fix-copies` to fix this."
+                "The model table in the `index.md` has not been updated. Run `make fix-copies` to fix this."
            )


--- a/utils/check_task_guides.py
+++ b/utils/check_task_guides.py
@@ -55,29 +55,29 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
 transformers_module = direct_transformers_import(TRANSFORMERS_PATH)

 TASK_GUIDE_TO_MODELS = {
-    "asr.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
-    "audio_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
-    "language_modeling.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
-    "image_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
-    "masked_language_modeling.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING_NAMES,
-    "multiple_choice.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
-    "object_detection.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,
-    "question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES,
-    "semantic_segmentation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
-    "sequence_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
-    "summarization.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
-    "token_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
-    "translation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
-    "video_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
-    "document_question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
-    "monocular_depth_estimation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
+    "asr.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
+    "audio_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
+    "language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
+    "image_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
+    "masked_language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING_NAMES,
+    "multiple_choice.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
+    "object_detection.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,
+    "question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES,
+    "semantic_segmentation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
+    "sequence_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
+    "summarization.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
+    "token_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
+    "translation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
+    "video_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
+    "document_question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
+    "monocular_depth_estimation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
 }

 # This list contains model types used in some task guides that are not in `CONFIG_MAPPING_NAMES` (therefore not in any
 # `MODEL_MAPPING_NAMES` or any `MODEL_FOR_XXX_MAPPING_NAMES`).
 SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = {
-    "summarization.mdx": ("nllb",),
-    "translation.mdx": ("nllb",),
+    "summarization.md": ("nllb",),
+    "translation.md": ("nllb",),
 }


--- a/utils/documentation_tests.txt
+++ b/utils/documentation_tests.txt
@@ -1,17 +1,17 @@
-docs/source/en/quicktour.mdx
-docs/source/es/quicktour.mdx
-docs/source/en/pipeline_tutorial.mdx
-docs/source/en/autoclass_tutorial.mdx
-docs/source/en/task_summary.mdx
-docs/source/en/model_doc/markuplm.mdx
-docs/source/en/model_doc/speech_to_text.mdx
-docs/source/en/model_doc/switch_transformers.mdx
-docs/source/en/model_doc/t5.mdx
-docs/source/en/model_doc/t5v1.1.mdx
-docs/source/en/model_doc/byt5.mdx
-docs/source/en/model_doc/tapex.mdx
-docs/source/en/model_doc/donut.mdx
-docs/source/en/model_doc/encoder-decoder.mdx
+docs/source/en/quicktour.md
+docs/source/es/quicktour.md
+docs/source/en/pipeline_tutorial.md
+docs/source/en/autoclass_tutorial.md
+docs/source/en/task_summary.md
+docs/source/en/model_doc/markuplm.md
+docs/source/en/model_doc/speech_to_text.md
+docs/source/en/model_doc/switch_transformers.md
+docs/source/en/model_doc/t5.md
+docs/source/en/model_doc/t5v1.1.md
+docs/source/en/model_doc/byt5.md
+docs/source/en/model_doc/tapex.md
+docs/source/en/model_doc/donut.md
+docs/source/en/model_doc/encoder-decoder.md
 src/transformers/generation/configuration_utils.py
 src/transformers/generation/tf_utils.py
 src/transformers/generation/utils.py
--- a/utils/notification_service_doc_tests.py
+++ b/utils/notification_service_doc_tests.py
@@ -329,7 +329,7 @@ if __name__ == "__main__":
    docs = collections.OrderedDict(
        [
            ("*.py", "API Examples"),
-            ("*.mdx", "MDX Examples"),
+            ("*.md", "MD Examples"),
        ]
    )

--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -263,14 +263,14 @@ def get_diff_for_doctesting(repo, base_commit, commits):
    code_diff = []
    for commit in commits:
        for diff_obj in commit.diff(base_commit):
-            # We always add new python/mdx files
-            if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")):
+            # We always add new python/md files
+            if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".md")):
                code_diff.append(diff_obj.b_path)
            # Now for modified files
            elif (
                diff_obj.change_type in ["M", "R"]
                and diff_obj.b_path.endswith(".py")
-                or diff_obj.b_path.endswith(".mdx")
+                or diff_obj.b_path.endswith(".md")
            ):
                # In case of renames, we'll look at the tests using both the old and new name.
                if diff_obj.a_path != diff_obj.b_path: