Migrate doc files to Markdown. (#24376)

* Rename index.mdx to index.md

* With saved modifs

* Address review comment

* Treat all files

* .mdx -> .md

* Remove special char

* Update utils/tests_fetcher.py

Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr>

---------

Co-authored-by: Lysandre Debut <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
Sylvain Gugger
2023-06-20 18:07:47 -04:00
committed by GitHub
parent b0513b013b
commit eb849f6604
472 changed files with 1909 additions and 98 deletions

View File

@@ -440,7 +440,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
# If the introduction or the conclusion of the list change, the prompts may need to be updated.
index_list, start_index, end_index, lines = _find_text_in_file(
filename=os.path.join(PATH_TO_DOCS, "index.mdx"),
filename=os.path.join(PATH_TO_DOCS, "index.md"),
start_prompt="<!--This list is updated automatically from the README",
end_prompt="### Supported frameworks",
)
@@ -464,11 +464,11 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
converted_md_list = convert_readme_to_index(md_list)
if converted_md_list != index_list:
if overwrite:
with open(os.path.join(PATH_TO_DOCS, "index.mdx"), "w", encoding="utf-8", newline="\n") as f:
with open(os.path.join(PATH_TO_DOCS, "index.md"), "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [converted_md_list] + lines[end_index:])
else:
raise ValueError(
"The model list in the README changed and the list in `index.mdx` has not been updated. Run "
"The model list in the README changed and the list in `index.md` has not been updated. Run "
"`make fix-copies` to fix this."
)

View File

@@ -756,7 +756,7 @@ def find_all_documented_objects():
content = f.read()
raw_doc_objs = re.findall(r"(?:autoclass|autofunction):: transformers.(\S+)\s+", content)
documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs]
for doc_file in Path(PATH_TO_DOC).glob("**/*.mdx"):
for doc_file in Path(PATH_TO_DOC).glob("**/*.md"):
with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
raw_doc_objs = re.findall(r"\[\[autodoc\]\]\s+(\S+)\s+", content)
@@ -908,7 +908,7 @@ def check_all_objects_are_documented():
def check_model_type_doc_match():
"""Check all doc pages have a corresponding model type."""
model_doc_folder = Path(PATH_TO_DOC) / "model_doc"
model_docs = [m.stem for m in model_doc_folder.glob("*.mdx")]
model_docs = [m.stem for m in model_doc_folder.glob("*.md")]
model_types = list(transformers.models.auto.configuration_auto.MODEL_NAMES_MAPPING.keys())
model_types = [MODEL_TYPE_TO_DOC_MAPPING[m] if m in MODEL_TYPE_TO_DOC_MAPPING else m for m in model_types]

View File

@@ -157,7 +157,7 @@ def get_model_table_from_auto_modules():
def check_model_table(overwrite=False):
"""Check the model table in the index.rst is consistent with the state of the lib and maybe `overwrite`."""
current_table, start_index, end_index, lines = _find_text_in_file(
filename=os.path.join(PATH_TO_DOCS, "index.mdx"),
filename=os.path.join(PATH_TO_DOCS, "index.md"),
start_prompt="<!--This table is updated automatically from the auto modules",
end_prompt="<!-- End table-->",
)
@@ -165,11 +165,11 @@ def check_model_table(overwrite=False):
if current_table != new_table:
if overwrite:
with open(os.path.join(PATH_TO_DOCS, "index.mdx"), "w", encoding="utf-8", newline="\n") as f:
with open(os.path.join(PATH_TO_DOCS, "index.md"), "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [new_table] + lines[end_index:])
else:
raise ValueError(
"The model table in the `index.mdx` has not been updated. Run `make fix-copies` to fix this."
"The model table in the `index.md` has not been updated. Run `make fix-copies` to fix this."
)

View File

@@ -55,29 +55,29 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
transformers_module = direct_transformers_import(TRANSFORMERS_PATH)
TASK_GUIDE_TO_MODELS = {
"asr.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
"audio_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
"language_modeling.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
"image_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
"masked_language_modeling.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING_NAMES,
"multiple_choice.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
"object_detection.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,
"question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES,
"semantic_segmentation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
"sequence_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
"summarization.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
"token_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
"translation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
"video_classification.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
"document_question_answering.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
"monocular_depth_estimation.mdx": transformers_module.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
"asr.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
"audio_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
"language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
"image_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
"masked_language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING_NAMES,
"multiple_choice.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
"object_detection.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,
"question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES,
"semantic_segmentation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
"sequence_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
"summarization.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
"token_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
"translation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
"video_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
"document_question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
"monocular_depth_estimation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
}
# This list contains model types used in some task guides that are not in `CONFIG_MAPPING_NAMES` (therefore not in any
# `MODEL_MAPPING_NAMES` or any `MODEL_FOR_XXX_MAPPING_NAMES`).
SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = {
"summarization.mdx": ("nllb",),
"translation.mdx": ("nllb",),
"summarization.md": ("nllb",),
"translation.md": ("nllb",),
}

View File

@@ -1,17 +1,17 @@
docs/source/en/quicktour.mdx
docs/source/es/quicktour.mdx
docs/source/en/pipeline_tutorial.mdx
docs/source/en/autoclass_tutorial.mdx
docs/source/en/task_summary.mdx
docs/source/en/model_doc/markuplm.mdx
docs/source/en/model_doc/speech_to_text.mdx
docs/source/en/model_doc/switch_transformers.mdx
docs/source/en/model_doc/t5.mdx
docs/source/en/model_doc/t5v1.1.mdx
docs/source/en/model_doc/byt5.mdx
docs/source/en/model_doc/tapex.mdx
docs/source/en/model_doc/donut.mdx
docs/source/en/model_doc/encoder-decoder.mdx
docs/source/en/quicktour.md
docs/source/es/quicktour.md
docs/source/en/pipeline_tutorial.md
docs/source/en/autoclass_tutorial.md
docs/source/en/task_summary.md
docs/source/en/model_doc/markuplm.md
docs/source/en/model_doc/speech_to_text.md
docs/source/en/model_doc/switch_transformers.md
docs/source/en/model_doc/t5.md
docs/source/en/model_doc/t5v1.1.md
docs/source/en/model_doc/byt5.md
docs/source/en/model_doc/tapex.md
docs/source/en/model_doc/donut.md
docs/source/en/model_doc/encoder-decoder.md
src/transformers/generation/configuration_utils.py
src/transformers/generation/tf_utils.py
src/transformers/generation/utils.py

View File

@@ -329,7 +329,7 @@ if __name__ == "__main__":
docs = collections.OrderedDict(
[
("*.py", "API Examples"),
("*.mdx", "MDX Examples"),
("*.md", "MD Examples"),
]
)

View File

@@ -263,14 +263,14 @@ def get_diff_for_doctesting(repo, base_commit, commits):
code_diff = []
for commit in commits:
for diff_obj in commit.diff(base_commit):
# We always add new python/mdx files
if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")):
# We always add new python/md files
if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".md")):
code_diff.append(diff_obj.b_path)
# Now for modified files
elif (
diff_obj.change_type in ["M", "R"]
and diff_obj.b_path.endswith(".py")
or diff_obj.b_path.endswith(".mdx")
or diff_obj.b_path.endswith(".md")
):
# In case of renames, we'll look at the tests using both the old and new name.
if diff_obj.a_path != diff_obj.b_path: