Avoid many failing tests in doctesting (#27262)

* fix * update * update * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2023-11-03 12:47:07 +01:00
parent 8f1a43cd91
commit af8d1dc309
3 changed files with 997 additions and 993 deletions
--- a/utils/check_doctest_list.py
+++ b/utils/check_doctest_list.py
@@ -54,7 +54,7 @@ def clean_doctest_list(doctest_file: str, overwrite: bool = False):
    all_paths = []
    with open(doctest_file, "r", encoding="utf-8") as f:
        for line in f:
-            line = line.strip()
+            line = line.strip().split(" ")[0]
            path = os.path.join(REPO_PATH, line)
            if not (os.path.isfile(path) or os.path.isdir(path)):
                non_existent_paths.append(line)
--- a/utils/not_doctested.txt
+++ b/utils/not_doctested.txt
@@ -293,6 +293,7 @@ docs/source/en/serialization.md
 docs/source/en/tasks/asr.md
 docs/source/en/tasks/audio_classification.md
 docs/source/en/tasks/document_question_answering.md
 docs/source/en/tasks/idefics.md  # causes other tests to fail
 docs/source/en/tasks/image_captioning.md
 docs/source/en/tasks/image_classification.md
 docs/source/en/tasks/language_modeling.md
@@ -430,6 +431,7 @@ src/transformers/models/blip/modeling_blip_text.py
 src/transformers/models/blip/modeling_tf_blip_text.py
 src/transformers/models/blip_2/configuration_blip_2.py
 src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py
 src/transformers/models/blip_2/modeling_blip_2.py  # causes other tests to fail
 src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py
 src/transformers/models/bloom/modeling_bloom.py
 src/transformers/models/bloom/modeling_flax_bloom.py
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -387,7 +387,7 @@ def get_all_doctest_files() -> List[str]:
    # These are files not doctested yet.
    with open("utils/not_doctested.txt") as fp:
-        not_doctested = set(fp.read().strip().split("\n"))
+        not_doctested = {x.split(" ")[0] for x in fp.read().strip().split("\n")}
    # So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%.
    test_files_to_run = [x for x in test_files_to_run if x not in not_doctested]
@@ -415,7 +415,9 @@ def get_new_doctest_files(repo, base_commit, branching_commit) -> List[str]:
        with open(folder / "utils/not_doctested.txt", "r", encoding="utf-8") as f:
            new_content = f.read()
        # Compute the removed lines and return them
-        removed_content = set(old_content.split("\n")) - set(new_content.split("\n"))
+        removed_content = {x.split(" ")[0] for x in old_content.split("\n")} - {
            x.split(" ")[0] for x in new_content.split("\n")
        }
        return sorted(removed_content)
    return []