From 6f8f2f6a77457f993a582bde2bff92af863a2d06 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 14 Sep 2022 07:36:12 -0400 Subject: [PATCH] Make AutoProcessor a magic loading class for all modalities (#18963) * Make AutoProcessor a magic loading class for all modalities * Quality --- .../models/auto/processing_auto.py | 24 +++++++++++++++---- tests/models/auto/test_processor_auto.py | 8 +++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py index 7eff84c5d5..07b2811a16 100644 --- a/src/transformers/models/auto/processing_auto.py +++ b/src/transformers/models/auto/processing_auto.py @@ -23,7 +23,7 @@ from ...configuration_utils import PretrainedConfig from ...dynamic_module_utils import get_class_from_dynamic_module from ...feature_extraction_utils import FeatureExtractionMixin from ...tokenization_utils import TOKENIZER_CONFIG_FILE -from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging +from ...utils import FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging from .auto_factory import _LazyAutoMapping from .configuration_auto import ( CONFIG_MAPPING_NAMES, @@ -31,6 +31,8 @@ from .configuration_auto import ( model_type_to_module_name, replace_list_option_in_docstrings, ) +from .feature_extraction_auto import AutoFeatureExtractor +from .tokenization_auto import AutoTokenizer logger = logging.get_logger(__name__) @@ -250,10 +252,24 @@ class AutoProcessor: if type(config) in PROCESSOR_MAPPING: return PROCESSOR_MAPPING[type(config)].from_pretrained(pretrained_model_name_or_path, **kwargs) + # At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a + # tokenizer. + try: + return AutoTokenizer.from_pretrained( + pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs + ) + except Exception: + try: + return AutoFeatureExtractor.from_pretrained( + pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs + ) + except Exception: + pass + raise ValueError( - f"Unrecognized processor in {pretrained_model_name_or_path}. Should have a `processor_type` key in " - f"its {FEATURE_EXTRACTOR_NAME}, or one of the following `model_type` keys in its {CONFIG_NAME}: " - f"{', '.join(c for c in PROCESSOR_MAPPING_NAMES.keys())}" + f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a " + "tokenizer or a feature extractor for this model. Make sure the repository contains the files of at least " + "one of those processing classes." ) @staticmethod diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index 2f99d5c379..fe57078a61 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -202,6 +202,14 @@ class AutoFeatureExtractorTest(unittest.TestCase): if CustomConfig in PROCESSOR_MAPPING._extra_content: del PROCESSOR_MAPPING._extra_content[CustomConfig] + def test_auto_processor_creates_tokenizer(self): + processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-bert") + self.assertEqual(processor.__class__.__name__, "BertTokenizerFast") + + def test_auto_processor_creates_feature_extractor(self): + processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-convnext") + self.assertEqual(processor.__class__.__name__, "ConvNextFeatureExtractor") + @is_staging_test class ProcessorPushToHubTester(unittest.TestCase):