From bd908e9bb133d65663e64d64667e6d958210e5b0 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 25 May 2022 07:23:40 -0400 Subject: [PATCH] Fix README localizer script (#17407) --- utils/check_copies.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/utils/check_copies.py b/utils/check_copies.py index 512615da27..7565bfa51b 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -312,8 +312,6 @@ def convert_to_localized_md(model_list, localized_model_list, format_str): # This regex is used to synchronize link. _re_capture_title_link = re.compile(r"\*\*\[([^\]]*)\]\(([^\)]*)\)\*\*") - num_models_equal = True - if len(localized_model_list) == 0: localized_model_index = {} else: @@ -325,10 +323,16 @@ def convert_to_localized_md(model_list, localized_model_list, format_str): except AttributeError: raise AttributeError("A model name in localized READMEs cannot be recognized.") + model_keys = [re.search(r"\*\*\[([^\]]*)", line).groups()[0] for line in model_list.strip().split("\n")] + + # We exclude keys in localized README not in the main one. + readmes_match = not any([k not in model_keys for k in localized_model_index]) + localized_model_index = {k: v for k, v in localized_model_index.items() if k in model_keys} + for model in model_list.strip().split("\n"): title, model_link = _re_capture_title_link.search(model).groups() if title not in localized_model_index: - num_models_equal = False + readmes_match = False # Add an anchor white space behind a model description string for regex. # If metadata cannot be captured, the English version will be directly copied. localized_model_index[title] = _re_capture_meta.sub(_rep, model + " ") @@ -340,7 +344,7 @@ def convert_to_localized_md(model_list, localized_model_list, format_str): sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower()) - return num_models_equal, "\n".join(map(lambda x: x[1], sorted_index)) + "\n" + return readmes_match, "\n".join(map(lambda x: x[1], sorted_index)) + "\n" def convert_readme_to_index(model_list): @@ -380,7 +384,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119): with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f: readme = f.read() new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers") - new_readme = readme.replace( + new_readme = new_readme.replace( "https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main" ) if new_readme != readme: @@ -412,9 +416,9 @@ def check_model_list_copy(overwrite=False, max_per_line=119): _format_model_list = value["format_model_list"] localized_md_list = get_model_list(filename, _start_prompt, _end_prompt) - num_models_equal, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list) + readmes_match, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list) - converted_md_lists.append((filename, num_models_equal, converted_md_list, _start_prompt, _end_prompt)) + converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt)) converted_md_list = convert_readme_to_index(md_list) if converted_md_list != index_list: @@ -428,7 +432,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119): ) for converted_md_list in converted_md_lists: - filename, num_models_equal, converted_md, _start_prompt, _end_prompt = converted_md_list + filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list if filename == "README.md": continue @@ -438,7 +442,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119): ) with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f: f.writelines(lines[:start_index] + [converted_md] + lines[end_index:]) - elif not num_models_equal: + elif not readmes_match: raise ValueError( f"The model list in the README changed and the list in `{filename}` has not been updated. Run " "`make fix-copies` to fix this."