[docs] Update to scripts building index.md (#26546)
* build the table in index.md with links to the model_doc * removed list generation on index.md * fixed missing models * make style
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
"""
|
||||
Utility that checks whether the copies defined in the library match the original or not. This includes:
|
||||
- All code commented with `# Copied from` comments,
|
||||
- The list of models in the main README.md matches the ones in the localized READMEs and in the index.md,
|
||||
- The list of models in the main README.md matches the ones in the localized READMEs,
|
||||
- Files that are registered as full copies of one another in the `FULL_COPIES` constant of this script.
|
||||
|
||||
This also checks the list of models in the README is complete (has all models) and add a line to complete if there is
|
||||
@@ -350,7 +350,7 @@ def is_copy_consistent(filename: str, overwrite: bool = False) -> Optional[List[
|
||||
def check_copies(overwrite: bool = False):
|
||||
"""
|
||||
Check every file is copy-consistent with the original. Also check the model list in the main README and other
|
||||
READMEs/index.md are consistent.
|
||||
READMEs are consistent.
|
||||
|
||||
Args:
|
||||
overwrite (`bool`, *optional*, defaults to `False`):
|
||||
@@ -517,21 +517,6 @@ def convert_to_localized_md(model_list: str, localized_model_list: str, format_s
|
||||
return readmes_match, "\n".join((x[1] for x in sorted_index)) + "\n"
|
||||
|
||||
|
||||
def convert_readme_to_index(model_list: str) -> str:
|
||||
"""
|
||||
Converts the model list of the README to the index.md format (adapting links to the doc to relative links).
|
||||
|
||||
Args:
|
||||
model_list (`str`): The model list of the main README.
|
||||
|
||||
Returns:
|
||||
`str`: The model list in the format for the index.
|
||||
"""
|
||||
# We need to replce both link to the main doc and stable doc (the order of the next two instructions is important).
|
||||
model_list = model_list.replace("https://huggingface.co/docs/transformers/main/", "")
|
||||
return model_list.replace("https://huggingface.co/docs/transformers/", "")
|
||||
|
||||
|
||||
def _find_text_in_file(filename: str, start_prompt: str, end_prompt: str) -> Tuple[str, int, int, List[str]]:
|
||||
"""
|
||||
Find the text in a file between two prompts.
|
||||
@@ -591,19 +576,13 @@ def check_model_list_copy(overwrite: bool = False):
|
||||
"automatically fix them."
|
||||
)
|
||||
|
||||
# If the introduction or the conclusion of the list change, the prompts may need to be updated.
|
||||
index_list, start_index, end_index, lines = _find_text_in_file(
|
||||
filename=os.path.join(PATH_TO_DOCS, "index.md"),
|
||||
start_prompt="<!--This list is updated automatically from the README",
|
||||
end_prompt="### Supported frameworks",
|
||||
)
|
||||
md_list = get_model_list(
|
||||
filename="README.md",
|
||||
start_prompt=LOCALIZED_READMES["README.md"]["start_prompt"],
|
||||
end_prompt=LOCALIZED_READMES["README.md"]["end_prompt"],
|
||||
)
|
||||
|
||||
# Buld the converted Markdown.
|
||||
# Build the converted Markdown.
|
||||
converted_md_lists = []
|
||||
for filename, value in LOCALIZED_READMES.items():
|
||||
_start_prompt = value["start_prompt"]
|
||||
@@ -615,18 +594,6 @@ def check_model_list_copy(overwrite: bool = False):
|
||||
|
||||
converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt))
|
||||
|
||||
# Build the converted index and compare it.
|
||||
converted_md_list = convert_readme_to_index(md_list)
|
||||
if converted_md_list != index_list:
|
||||
if overwrite:
|
||||
with open(os.path.join(PATH_TO_DOCS, "index.md"), "w", encoding="utf-8", newline="\n") as f:
|
||||
f.writelines(lines[:start_index] + [converted_md_list] + lines[end_index:])
|
||||
else:
|
||||
raise ValueError(
|
||||
"The model list in the README changed and the list in `index.md` has not been updated. Run "
|
||||
"`make fix-copies` to fix this."
|
||||
)
|
||||
|
||||
# Compare the converted Markdowns
|
||||
for converted_md_list in converted_md_lists:
|
||||
filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list
|
||||
|
||||
@@ -132,6 +132,46 @@ def _center_text(text: str, width: int) -> str:
|
||||
return " " * left_indent + text + " " * right_indent
|
||||
|
||||
|
||||
SPECIAL_MODEL_NAME_LINK_MAPPING = {
|
||||
"Data2VecAudio": "[Data2VecAudio](model_doc/data2vec)",
|
||||
"Data2VecText": "[Data2VecText](model_doc/data2vec)",
|
||||
"Data2VecVision": "[Data2VecVision](model_doc/data2vec)",
|
||||
"DonutSwin": "[DonutSwin](model_doc/donut)",
|
||||
}
|
||||
|
||||
MODEL_NAMES_WITH_SAME_CONFIG = {
|
||||
"BARThez": "BART",
|
||||
"BARTpho": "BART",
|
||||
"BertJapanese": "BERT",
|
||||
"BERTweet": "BERT",
|
||||
"BORT": "BERT",
|
||||
"ByT5": "T5",
|
||||
"CPM": "OpenAI GPT-2",
|
||||
"DePlot": "Pix2Struct",
|
||||
"DialoGPT": "OpenAI GPT-2",
|
||||
"DiT": "BEiT",
|
||||
"FLAN-T5": "T5",
|
||||
"FLAN-UL2": "T5",
|
||||
"HerBERT": "BERT",
|
||||
"LayoutXLM": "LayoutLMv2",
|
||||
"Llama2": "LLaMA",
|
||||
"MatCha": "Pix2Struct",
|
||||
"mBART-50": "mBART",
|
||||
"Megatron-GPT2": "OpenAI GPT-2",
|
||||
"mLUKE": "LUKE",
|
||||
"MMS": "Wav2Vec2",
|
||||
"NLLB": "M2M100",
|
||||
"PhoBERT": "BERT",
|
||||
"T5v1.1": "T5",
|
||||
"TAPEX": "BART",
|
||||
"UL2": "T5",
|
||||
"Wav2Vec2Phoneme": "Wav2Vec2",
|
||||
"XLM-V": "XLM-RoBERTa",
|
||||
"XLS-R": "Wav2Vec2",
|
||||
"XLSR-Wav2Vec2": "Wav2Vec2",
|
||||
}
|
||||
|
||||
|
||||
def get_model_table_from_auto_modules() -> str:
|
||||
"""
|
||||
Generates an up-to-date model table from the content of the auto modules.
|
||||
@@ -172,17 +212,27 @@ def get_model_table_from_auto_modules() -> str:
|
||||
attr_name = "".join(camel_case_split(attr_name)[:-1])
|
||||
|
||||
# Let's build that table!
|
||||
model_names = list(model_name_to_config.keys())
|
||||
model_names = list(model_name_to_config.keys()) + list(MODEL_NAMES_WITH_SAME_CONFIG.keys())
|
||||
|
||||
# model name to doc link mapping
|
||||
model_names_mapping = transformers_module.models.auto.configuration_auto.MODEL_NAMES_MAPPING
|
||||
model_name_to_link_mapping = {value: f"[{value}](model_doc/{key})" for key, value in model_names_mapping.items()}
|
||||
# update mapping with special model names
|
||||
model_name_to_link_mapping = {
|
||||
k: SPECIAL_MODEL_NAME_LINK_MAPPING[k] if k in SPECIAL_MODEL_NAME_LINK_MAPPING else v
|
||||
for k, v in model_name_to_link_mapping.items()
|
||||
}
|
||||
|
||||
# MaskFormerSwin and TimmBackbone are backbones and so not meant to be loaded and used on their own. Instead, they define architectures which can be loaded using the AutoBackbone API.
|
||||
names_to_exclude = ["MaskFormerSwin", "TimmBackbone"]
|
||||
names_to_exclude = ["MaskFormerSwin", "TimmBackbone", "Speech2Text2"]
|
||||
model_names = [name for name in model_names if name not in names_to_exclude]
|
||||
model_names.sort(key=str.lower)
|
||||
|
||||
columns = ["Model", "PyTorch support", "TensorFlow support", "Flax Support"]
|
||||
# We'll need widths to properly display everything in the center (+2 is to leave one extra space on each side).
|
||||
|
||||
widths = [len(c) + 2 for c in columns]
|
||||
widths[0] = max([len(name) for name in model_names]) + 2
|
||||
widths[0] = max([len(doc_link) for doc_link in model_name_to_link_mapping.values()]) + 2
|
||||
|
||||
# Build the table per se
|
||||
table = "|" + "|".join([_center_text(c, w) for c, w in zip(columns, widths)]) + "|\n"
|
||||
@@ -190,10 +240,14 @@ def get_model_table_from_auto_modules() -> str:
|
||||
table += "|" + "|".join([":" + "-" * (w - 2) + ":" for w in widths]) + "|\n"
|
||||
|
||||
check = {True: "✅", False: "❌"}
|
||||
|
||||
for name in model_names:
|
||||
prefix = model_name_to_prefix[name]
|
||||
if name in MODEL_NAMES_WITH_SAME_CONFIG.keys():
|
||||
prefix = model_name_to_prefix[MODEL_NAMES_WITH_SAME_CONFIG[name]]
|
||||
else:
|
||||
prefix = model_name_to_prefix[name]
|
||||
line = [
|
||||
name,
|
||||
model_name_to_link_mapping[name],
|
||||
check[pt_models[prefix]],
|
||||
check[tf_models[prefix]],
|
||||
check[flax_models[prefix]],
|
||||
|
||||
Reference in New Issue
Block a user