Fix README localizer script (#17407)
This commit is contained in:
@@ -312,8 +312,6 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
|
|||||||
# This regex is used to synchronize link.
|
# This regex is used to synchronize link.
|
||||||
_re_capture_title_link = re.compile(r"\*\*\[([^\]]*)\]\(([^\)]*)\)\*\*")
|
_re_capture_title_link = re.compile(r"\*\*\[([^\]]*)\]\(([^\)]*)\)\*\*")
|
||||||
|
|
||||||
num_models_equal = True
|
|
||||||
|
|
||||||
if len(localized_model_list) == 0:
|
if len(localized_model_list) == 0:
|
||||||
localized_model_index = {}
|
localized_model_index = {}
|
||||||
else:
|
else:
|
||||||
@@ -325,10 +323,16 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise AttributeError("A model name in localized READMEs cannot be recognized.")
|
raise AttributeError("A model name in localized READMEs cannot be recognized.")
|
||||||
|
|
||||||
|
model_keys = [re.search(r"\*\*\[([^\]]*)", line).groups()[0] for line in model_list.strip().split("\n")]
|
||||||
|
|
||||||
|
# We exclude keys in localized README not in the main one.
|
||||||
|
readmes_match = not any([k not in model_keys for k in localized_model_index])
|
||||||
|
localized_model_index = {k: v for k, v in localized_model_index.items() if k in model_keys}
|
||||||
|
|
||||||
for model in model_list.strip().split("\n"):
|
for model in model_list.strip().split("\n"):
|
||||||
title, model_link = _re_capture_title_link.search(model).groups()
|
title, model_link = _re_capture_title_link.search(model).groups()
|
||||||
if title not in localized_model_index:
|
if title not in localized_model_index:
|
||||||
num_models_equal = False
|
readmes_match = False
|
||||||
# Add an anchor white space behind a model description string for regex.
|
# Add an anchor white space behind a model description string for regex.
|
||||||
# If metadata cannot be captured, the English version will be directly copied.
|
# If metadata cannot be captured, the English version will be directly copied.
|
||||||
localized_model_index[title] = _re_capture_meta.sub(_rep, model + " ")
|
localized_model_index[title] = _re_capture_meta.sub(_rep, model + " ")
|
||||||
@@ -340,7 +344,7 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
|
|||||||
|
|
||||||
sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower())
|
sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower())
|
||||||
|
|
||||||
return num_models_equal, "\n".join(map(lambda x: x[1], sorted_index)) + "\n"
|
return readmes_match, "\n".join(map(lambda x: x[1], sorted_index)) + "\n"
|
||||||
|
|
||||||
|
|
||||||
def convert_readme_to_index(model_list):
|
def convert_readme_to_index(model_list):
|
||||||
@@ -380,7 +384,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
|
|||||||
with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
|
with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
|
||||||
readme = f.read()
|
readme = f.read()
|
||||||
new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers")
|
new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers")
|
||||||
new_readme = readme.replace(
|
new_readme = new_readme.replace(
|
||||||
"https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main"
|
"https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main"
|
||||||
)
|
)
|
||||||
if new_readme != readme:
|
if new_readme != readme:
|
||||||
@@ -412,9 +416,9 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
|
|||||||
_format_model_list = value["format_model_list"]
|
_format_model_list = value["format_model_list"]
|
||||||
|
|
||||||
localized_md_list = get_model_list(filename, _start_prompt, _end_prompt)
|
localized_md_list = get_model_list(filename, _start_prompt, _end_prompt)
|
||||||
num_models_equal, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)
|
readmes_match, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)
|
||||||
|
|
||||||
converted_md_lists.append((filename, num_models_equal, converted_md_list, _start_prompt, _end_prompt))
|
converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt))
|
||||||
|
|
||||||
converted_md_list = convert_readme_to_index(md_list)
|
converted_md_list = convert_readme_to_index(md_list)
|
||||||
if converted_md_list != index_list:
|
if converted_md_list != index_list:
|
||||||
@@ -428,7 +432,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
|
|||||||
)
|
)
|
||||||
|
|
||||||
for converted_md_list in converted_md_lists:
|
for converted_md_list in converted_md_lists:
|
||||||
filename, num_models_equal, converted_md, _start_prompt, _end_prompt = converted_md_list
|
filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list
|
||||||
|
|
||||||
if filename == "README.md":
|
if filename == "README.md":
|
||||||
continue
|
continue
|
||||||
@@ -438,7 +442,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
|
|||||||
)
|
)
|
||||||
with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f:
|
with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f:
|
||||||
f.writelines(lines[:start_index] + [converted_md] + lines[end_index:])
|
f.writelines(lines[:start_index] + [converted_md] + lines[end_index:])
|
||||||
elif not num_models_equal:
|
elif not readmes_match:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The model list in the README changed and the list in `{filename}` has not been updated. Run "
|
f"The model list in the README changed and the list in `{filename}` has not been updated. Run "
|
||||||
"`make fix-copies` to fix this."
|
"`make fix-copies` to fix this."
|
||||||
|
|||||||
Reference in New Issue
Block a user