Make tiny model creation + pipeline testing more robust (#22500)

* Final Tiny things --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2023-04-06 17:45:55 +02:00
parent 12d51db243
commit 2c22bc79c2
8 changed files with 161 additions and 64 deletions
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -294,6 +294,15 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
    test_missing_keys = False
    test_torchscript = False

+    # TODO: Fix the failed tests when this model gets more usage
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name == "QAPipelineTests" and not tokenizer_name.endswith("Fast"):
+            return True
+
+        return False
+
    def setUp(self):
        self.model_tester = LEDModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LEDConfig)
--- a/tests/models/nllb_moe/test_modeling_nllb_moe.py
+++ b/tests/models/nllb_moe/test_modeling_nllb_moe.py
@@ -265,6 +265,13 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
    test_missing_keys = True
    test_torchscript = False

+    # TODO: Fix the failed tests when this model gets more usage
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        # Saving the slow tokenizer after saving the fast tokenizer causes the loading of the later hanging forever.
+        return True
+
    def setUp(self):
        self.model_tester = NllbMoeModelTester(self)
        self.config_tester = ConfigTester(self, config_class=NllbMoeConfig)
--- a/tests/models/splinter/test_modeling_splinter.py
+++ b/tests/models/splinter/test_modeling_splinter.py
@@ -230,6 +230,8 @@ class SplinterModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    ):
        if pipeline_test_casse_name == "QAPipelineTests":
            return True
+        elif pipeline_test_casse_name == "FeatureExtractionPipelineTests" and tokenizer_name.endswith("Fast"):
+            return True

        return False

--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -93,7 +93,14 @@ for task, task_info in pipeline_test_mapping.items():
    }


-TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(Path(__file__).parent.parent, "tests/utils/tiny_model_summary.json")
+# The default value `hf-internal-testing` is for running the pipeline testing against the tiny models on the Hub.
+# For debugging purpose, we can specify a local path which is the `output_path` argument of a previous run of
+# `utils/create_dummy_models.py`.
+TRANSFORMERS_TINY_MODEL_PATH = os.environ.get("TRANSFORMERS_TINY_MODEL_PATH", "hf-internal-testing")
+if TRANSFORMERS_TINY_MODEL_PATH == "hf-internal-testing":
+    TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(Path(__file__).parent.parent, "tests/utils/tiny_model_summary.json")
+else:
+    TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, "reports", "tiny_model_summary.json")
 with open(TINY_MODEL_SUMMARY_FILE_PATH) as fp:
    tiny_model_summary = json.load(fp)

@@ -146,12 +153,15 @@ class PipelineTesterMixin:
            if model_arch_name in tiny_model_summary:
                tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
                processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
-                commit = tiny_model_summary[model_arch_name]["sha"]
+                if "sha" in tiny_model_summary[model_arch_name]:
+                    commit = tiny_model_summary[model_arch_name]["sha"]
            # Adding `None` (if empty) so we can generate tests
            tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
            processor_names = [None] if len(processor_names) == 0 else processor_names

            repo_name = f"tiny-random-{model_arch_name}"
+            if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
+                repo_name = model_arch_name

            self.run_model_pipeline_tests(
                task, repo_name, model_architecture, tokenizer_names, processor_names, commit
@@ -210,7 +220,10 @@ class PipelineTesterMixin:
            processor_name (`str`):
                The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`.
        """
-        repo_id = f"hf-internal-testing/{repo_name}"
+        repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}"
+        if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
+            model_type = model_architecture.config_class.model_type
+            repo_id = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, model_type, repo_name)

        tokenizer = None
        if tokenizer_name is not None: