Make tiny model creation + pipeline testing more robust (#22500)

* Final Tiny things

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-04-06 17:45:55 +02:00
committed by GitHub
parent 12d51db243
commit 2c22bc79c2
8 changed files with 161 additions and 64 deletions

View File

@@ -294,6 +294,15 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
test_missing_keys = False
test_torchscript = False
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "QAPipelineTests" and not tokenizer_name.endswith("Fast"):
return True
return False
def setUp(self):
self.model_tester = LEDModelTester(self)
self.config_tester = ConfigTester(self, config_class=LEDConfig)

View File

@@ -265,6 +265,13 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
test_missing_keys = True
test_torchscript = False
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
# Saving the slow tokenizer after saving the fast tokenizer causes the loading of the later hanging forever.
return True
def setUp(self):
self.model_tester = NllbMoeModelTester(self)
self.config_tester = ConfigTester(self, config_class=NllbMoeConfig)

View File

@@ -230,6 +230,8 @@ class SplinterModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
):
if pipeline_test_casse_name == "QAPipelineTests":
return True
elif pipeline_test_casse_name == "FeatureExtractionPipelineTests" and tokenizer_name.endswith("Fast"):
return True
return False

View File

@@ -93,7 +93,14 @@ for task, task_info in pipeline_test_mapping.items():
}
TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(Path(__file__).parent.parent, "tests/utils/tiny_model_summary.json")
# The default value `hf-internal-testing` is for running the pipeline testing against the tiny models on the Hub.
# For debugging purpose, we can specify a local path which is the `output_path` argument of a previous run of
# `utils/create_dummy_models.py`.
TRANSFORMERS_TINY_MODEL_PATH = os.environ.get("TRANSFORMERS_TINY_MODEL_PATH", "hf-internal-testing")
if TRANSFORMERS_TINY_MODEL_PATH == "hf-internal-testing":
TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(Path(__file__).parent.parent, "tests/utils/tiny_model_summary.json")
else:
TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, "reports", "tiny_model_summary.json")
with open(TINY_MODEL_SUMMARY_FILE_PATH) as fp:
tiny_model_summary = json.load(fp)
@@ -146,12 +153,15 @@ class PipelineTesterMixin:
if model_arch_name in tiny_model_summary:
tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
commit = tiny_model_summary[model_arch_name]["sha"]
if "sha" in tiny_model_summary[model_arch_name]:
commit = tiny_model_summary[model_arch_name]["sha"]
# Adding `None` (if empty) so we can generate tests
tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
processor_names = [None] if len(processor_names) == 0 else processor_names
repo_name = f"tiny-random-{model_arch_name}"
if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
repo_name = model_arch_name
self.run_model_pipeline_tests(
task, repo_name, model_architecture, tokenizer_names, processor_names, commit
@@ -210,7 +220,10 @@ class PipelineTesterMixin:
processor_name (`str`):
The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`.
"""
repo_id = f"hf-internal-testing/{repo_name}"
repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}"
if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
model_type = model_architecture.config_class.model_type
repo_id = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, model_type, repo_name)
tokenizer = None
if tokenizer_name is not None: