Save Processor (#27761)

* save processor * Update tests/models/auto/test_processor_auto.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * Update tests/test_processing_common.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
2024-01-18 11:21:45 +01:00
parent 98dda8ed03
commit 3005f96552
6 changed files with 480 additions and 19 deletions
--- a/src/transformers/models/auto/processing_auto.py
+++ b/src/transformers/models/auto/processing_auto.py
@@ -25,8 +25,9 @@ from ...configuration_utils import PretrainedConfig
 from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
 from ...feature_extraction_utils import FeatureExtractionMixin
 from ...image_processing_utils import ImageProcessingMixin
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils import TOKENIZER_CONFIG_FILE
-from ...utils import FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging
+from ...utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, get_file_from_repo, logging
 from .auto_factory import _LazyAutoMapping
 from .configuration_auto import (
    CONFIG_MAPPING_NAMES,
@@ -227,12 +228,24 @@ class AutoProcessor:
        processor_class = None
        processor_auto_map = None
-        # First, let's see if we have a preprocessor config.
+        # First, let's see if we have a processor or preprocessor config.
        # Filter the kwargs for `get_file_from_repo`.
        get_file_from_repo_kwargs = {
            key: kwargs[key] for key in inspect.signature(get_file_from_repo).parameters.keys() if key in kwargs
        }
-        # Let's start by checking whether the processor class is saved in an image processor
+
        # Let's start by checking whether the processor class is saved in a processor config
        processor_config_file = get_file_from_repo(
            pretrained_model_name_or_path, PROCESSOR_NAME, **get_file_from_repo_kwargs
        )
        if processor_config_file is not None:
            config_dict, _ = ProcessorMixin.get_processor_dict(pretrained_model_name_or_path, **kwargs)
            processor_class = config_dict.get("processor_class", None)
            if "AutoProcessor" in config_dict.get("auto_map", {}):
                processor_auto_map = config_dict["auto_map"]["AutoProcessor"]
        if processor_class is None:
            # If not found, let's check whether the processor class is saved in an image processor config
            preprocessor_config_file = get_file_from_repo(
                pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, **get_file_from_repo_kwargs
            )
@@ -244,7 +257,9 @@ class AutoProcessor:
            # If not found, let's check whether the processor class is saved in a feature extractor config
            if preprocessor_config_file is not None and processor_class is None:
-            config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(pretrained_model_name_or_path, **kwargs)
+                config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
                    pretrained_model_name_or_path, **kwargs
                )
                processor_class = config_dict.get("processor_class", None)
                if "AutoProcessor" in config_dict.get("auto_map", {}):
                    processor_auto_map = config_dict["auto_map"]["AutoProcessor"]
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -16,14 +16,28 @@
 Processing saving/loading class for common processors.
 """
 import copy
 import inspect
 import json
 import os
 import warnings
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union
 from .dynamic_module_utils import custom_object_save
 from .tokenization_utils_base import PreTrainedTokenizerBase
-from .utils import PushToHubMixin, copy_func, direct_transformers_import, logging
+from .utils import (
    PROCESSOR_NAME,
    PushToHubMixin,
    add_model_info_to_auto_map,
    cached_file,
    copy_func,
    direct_transformers_import,
    download_url,
    is_offline_mode,
    is_remote_url,
    logging,
 )
 logger = logging.get_logger(__name__)
@@ -85,10 +99,70 @@ class ProcessorMixin(PushToHubMixin):
            setattr(self, attribute_name, arg)
    def to_dict(self) -> Dict[str, Any]:
        """
        Serializes this instance to a Python dictionary.
        Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
        """
        output = copy.deepcopy(self.__dict__)
        # Get the kwargs in `__init__`.
        sig = inspect.signature(self.__init__)
        # Only save the attributes that are presented in the kwargs of `__init__`.
        attrs_to_save = sig.parameters
        # Don't save attributes like `tokenizer`, `image processor` etc.
        attrs_to_save = [x for x in attrs_to_save if x not in self.__class__.attributes]
        # extra attributes to be kept
        attrs_to_save += ["auto_map"]
        output = {k: v for k, v in output.items() if k in attrs_to_save}
        output["processor_class"] = self.__class__.__name__
        if "tokenizer" in output:
            del output["tokenizer"]
        if "image_processor" in output:
            del output["image_processor"]
        if "feature_extractor" in output:
            del output["feature_extractor"]
        # Some attributes have different names but containing objects that are not simple strings
        output = {
            k: v
            for k, v in output.items()
            if not (isinstance(v, PushToHubMixin) or v.__class__.__name__ == "BeamSearchDecoderCTC")
        }
        return output
    def to_json_string(self) -> str:
        """
        Serializes this instance to a JSON string.
        Returns:
            `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
        """
        dictionary = self.to_dict()
        return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
    def to_json_file(self, json_file_path: Union[str, os.PathLike]):
        """
        Save this instance to a JSON file.
        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this processor instance's parameters will be saved.
        """
        with open(json_file_path, "w", encoding="utf-8") as writer:
            writer.write(self.to_json_string())
    def __repr__(self):
        attributes_repr = [f"- {name}: {repr(getattr(self, name))}" for name in self.attributes]
        attributes_repr = "\n".join(attributes_repr)
-        return f"{self.__class__.__name__}:\n{attributes_repr}"
+        return f"{self.__class__.__name__}:\n{attributes_repr}\n\n{self.to_json_string()}"
    def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
        """
@@ -139,6 +213,7 @@ class ProcessorMixin(PushToHubMixin):
        if self._auto_class is not None:
            attrs = [getattr(self, attribute_name) for attribute_name in self.attributes]
            configs = [(a.init_kwargs if isinstance(a, PreTrainedTokenizerBase) else a) for a in attrs]
            configs.append(self)
            custom_object_save(self, save_directory, config=configs)
        for attribute_name in self.attributes:
@@ -156,6 +231,12 @@ class ProcessorMixin(PushToHubMixin):
                if isinstance(attribute, PreTrainedTokenizerBase):
                    del attribute.init_kwargs["auto_map"]
        # If we save using the predefined names, we can load using `from_pretrained`
        output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
        self.to_json_file(output_processor_file)
        logger.info(f"processor saved in {output_processor_file}")
        if push_to_hub:
            self._upload_modified_files(
                save_directory,
@@ -165,6 +246,150 @@ class ProcessorMixin(PushToHubMixin):
                token=kwargs.get("token"),
            )
        return [output_processor_file]
    @classmethod
    def get_processor_dict(
        cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        processor of type [`~processing_utils.ProcessingMixin`] using `from_args_and_dict`.
        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
            subfolder (`str`, *optional*, defaults to `""`):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
                specify the folder name here.
        Returns:
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the processor object.
        """
        cache_dir = kwargs.pop("cache_dir", None)
        force_download = kwargs.pop("force_download", False)
        resume_download = kwargs.pop("resume_download", False)
        proxies = kwargs.pop("proxies", None)
        token = kwargs.pop("token", None)
        local_files_only = kwargs.pop("local_files_only", False)
        revision = kwargs.pop("revision", None)
        subfolder = kwargs.pop("subfolder", "")
        from_pipeline = kwargs.pop("_from_pipeline", None)
        from_auto_class = kwargs.pop("_from_auto", False)
        user_agent = {"file_type": "processor", "from_auto_class": from_auto_class}
        if from_pipeline is not None:
            user_agent["using_pipeline"] = from_pipeline
        if is_offline_mode() and not local_files_only:
            logger.info("Offline mode: forcing local_files_only=True")
            local_files_only = True
        pretrained_model_name_or_path = str(pretrained_model_name_or_path)
        is_local = os.path.isdir(pretrained_model_name_or_path)
        if os.path.isdir(pretrained_model_name_or_path):
            processor_file = os.path.join(pretrained_model_name_or_path, PROCESSOR_NAME)
        if os.path.isfile(pretrained_model_name_or_path):
            resolved_processor_file = pretrained_model_name_or_path
            is_local = True
        elif is_remote_url(pretrained_model_name_or_path):
            processor_file = pretrained_model_name_or_path
            resolved_processor_file = download_url(pretrained_model_name_or_path)
        else:
            processor_file = PROCESSOR_NAME
            try:
                # Load from local folder or from cache or download from model Hub and cache
                resolved_processor_file = cached_file(
                    pretrained_model_name_or_path,
                    processor_file,
                    cache_dir=cache_dir,
                    force_download=force_download,
                    proxies=proxies,
                    resume_download=resume_download,
                    local_files_only=local_files_only,
                    token=token,
                    user_agent=user_agent,
                    revision=revision,
                    subfolder=subfolder,
                )
            except EnvironmentError:
                # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
                # the original exception.
                raise
            except Exception:
                # For any other exception, we throw a generic error.
                raise EnvironmentError(
                    f"Can't load processor for '{pretrained_model_name_or_path}'. If you were trying to load"
                    " it from 'https://huggingface.co/models', make sure you don't have a local directory with the"
                    f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a"
                    f" directory containing a {PROCESSOR_NAME} file"
                )
        try:
            # Load processor dict
            with open(resolved_processor_file, "r", encoding="utf-8") as reader:
                text = reader.read()
            processor_dict = json.loads(text)
        except json.JSONDecodeError:
            raise EnvironmentError(
                f"It looks like the config file at '{resolved_processor_file}' is not a valid JSON file."
            )
        if is_local:
            logger.info(f"loading configuration file {resolved_processor_file}")
        else:
            logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}")
        if "auto_map" in processor_dict and not is_local:
            processor_dict["auto_map"] = add_model_info_to_auto_map(
                processor_dict["auto_map"], pretrained_model_name_or_path
            )
        return processor_dict, kwargs
    @classmethod
    def from_args_and_dict(cls, args, processor_dict: Dict[str, Any], **kwargs):
        """
        Instantiates a type of [`~processing_utils.ProcessingMixin`] from a Python dictionary of parameters.
        Args:
            processor_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the processor object. Such a dictionary can be
                retrieved from a pretrained checkpoint by leveraging the
                [`~processing_utils.ProcessingMixin.to_dict`] method.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the processor object.
        Returns:
            [`~processing_utils.ProcessingMixin`]: The processor object instantiated from those
            parameters.
        """
        processor_dict = processor_dict.copy()
        return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
        # Unlike image processors or feature extractors whose `__init__` accept `kwargs`, processor don't have `kwargs`.
        # We have to pop up some unused (but specific) arguments to make it work.
        if "processor_class" in processor_dict:
            del processor_dict["processor_class"]
        if "auto_map" in processor_dict:
            del processor_dict["auto_map"]
        processor = cls(*args, **processor_dict)
        # Update processor with kwargs if needed
        for key in set(kwargs.keys()):
            if hasattr(processor, key):
                setattr(processor, key, kwargs.pop(key))
        logger.info(f"Processor {processor}")
        if return_unused_kwargs:
            return processor, kwargs
        else:
            return processor
    @classmethod
    def from_pretrained(
        cls,
@@ -226,7 +451,19 @@ class ProcessorMixin(PushToHubMixin):
            kwargs["token"] = token
        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(*args)
+
        # Existing processors on the Hub created before #27761 being merged don't have `processor_config.json` (if not
        # updated afterward), and we need to keep `from_pretrained` work. So here it fallbacks to the empty dict.
        # However, for models added in the future, we won't get the expected error if this file is missing.
        try:
            processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
        except EnvironmentError as e:
            if "does not appear to have a file named processor_config.json." in str(e):
                processor_dict, kwargs = {}, kwargs
            else:
                raise
        return cls.from_args_and_dict(args, processor_dict, **kwargs)
    @classmethod
    def register_for_auto_class(cls, auto_class="AutoProcessor"):
--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@@ -217,6 +217,7 @@ SAFE_WEIGHTS_INDEX_NAME = "model.safetensors.index.json"
 CONFIG_NAME = "config.json"
 FEATURE_EXTRACTOR_NAME = "preprocessor_config.json"
 IMAGE_PROCESSOR_NAME = FEATURE_EXTRACTOR_NAME
 PROCESSOR_NAME = "processor_config.json"
 GENERATION_CONFIG_NAME = "generation_config.json"
 MODEL_CARD_NAME = "modelcard.json"
--- a/tests/models/auto/test_processor_auto.py
+++ b/tests/models/auto/test_processor_auto.py
@@ -42,7 +42,7 @@ from transformers import (
 )
 from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
 from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE
-from transformers.utils import FEATURE_EXTRACTOR_NAME, is_tokenizers_available
+from transformers.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, is_tokenizers_available
 sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils"))
@@ -91,6 +91,28 @@ class AutoFeatureExtractorTest(unittest.TestCase):
        self.assertIsInstance(processor, Wav2Vec2Processor)
    def test_processor_from_processor_class(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            feature_extractor = Wav2Vec2FeatureExtractor()
            tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
            processor = Wav2Vec2Processor(feature_extractor, tokenizer)
            # save in new folder
            processor.save_pretrained(tmpdirname)
            # drop `processor_class` in tokenizer config
            with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
                config_dict = json.load(f)
                config_dict.pop("processor_class")
            with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "w") as f:
                f.write(json.dumps(config_dict))
            processor = AutoProcessor.from_pretrained(tmpdirname)
        self.assertIsInstance(processor, Wav2Vec2Processor)
    def test_processor_from_feat_extr_processor_class(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            feature_extractor = Wav2Vec2FeatureExtractor()
@@ -101,6 +123,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
            # save in new folder
            processor.save_pretrained(tmpdirname)
            # drop `processor_class` in processor
            with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
                config_dict = json.load(f)
                config_dict.pop("processor_class")
            with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
                f.write(json.dumps(config_dict))
            # drop `processor_class` in tokenizer
            with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
                config_dict = json.load(f)
@@ -123,6 +153,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
            # save in new folder
            processor.save_pretrained(tmpdirname)
            # drop `processor_class` in processor
            with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
                config_dict = json.load(f)
                config_dict.pop("processor_class")
            with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
                f.write(json.dumps(config_dict))
            # drop `processor_class` in feature extractor
            with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f:
                config_dict = json.load(f)
@@ -270,6 +308,45 @@ class AutoFeatureExtractorTest(unittest.TestCase):
            if CustomConfig in PROCESSOR_MAPPING._extra_content:
                del PROCESSOR_MAPPING._extra_content[CustomConfig]
    def test_from_pretrained_dynamic_processor_with_extra_attributes(self):
        class NewFeatureExtractor(Wav2Vec2FeatureExtractor):
            pass
        class NewTokenizer(BertTokenizer):
            pass
        class NewProcessor(ProcessorMixin):
            feature_extractor_class = "AutoFeatureExtractor"
            tokenizer_class = "AutoTokenizer"
            def __init__(self, feature_extractor, tokenizer, processor_attr_1=1, processor_attr_2=True):
                super().__init__(feature_extractor, tokenizer)
                self.processor_attr_1 = processor_attr_1
                self.processor_attr_2 = processor_attr_2
        try:
            AutoConfig.register("custom", CustomConfig)
            AutoFeatureExtractor.register(CustomConfig, NewFeatureExtractor)
            AutoTokenizer.register(CustomConfig, slow_tokenizer_class=NewTokenizer)
            AutoProcessor.register(CustomConfig, NewProcessor)
            # If remote code is not set, the default is to use local classes.
            processor = AutoProcessor.from_pretrained(
                "hf-internal-testing/test_dynamic_processor", processor_attr_2=False
            )
            self.assertEqual(processor.__class__.__name__, "NewProcessor")
            self.assertEqual(processor.processor_attr_1, 1)
            self.assertEqual(processor.processor_attr_2, False)
        finally:
            if "custom" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["custom"]
            if CustomConfig in FEATURE_EXTRACTOR_MAPPING._extra_content:
                del FEATURE_EXTRACTOR_MAPPING._extra_content[CustomConfig]
            if CustomConfig in TOKENIZER_MAPPING._extra_content:
                del TOKENIZER_MAPPING._extra_content[CustomConfig]
            if CustomConfig in PROCESSOR_MAPPING._extra_content:
                del PROCESSOR_MAPPING._extra_content[CustomConfig]
    def test_auto_processor_creates_tokenizer(self):
        processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-bert")
        self.assertEqual(processor.__class__.__name__, "BertTokenizerFast")
--- a/tests/models/clip/test_processor_clip.py
+++ b/tests/models/clip/test_processor_clip.py
@@ -26,6 +26,8 @@ from transformers.models.clip.tokenization_clip import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available
 from ...test_processing_common import ProcessorTesterMixin
 if is_vision_available():
    from PIL import Image
@@ -34,7 +36,9 @@ if is_vision_available():
@require_vision
-class CLIPProcessorTest(unittest.TestCase):
+class CLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    processor_class = CLIPProcessor
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -0,0 +1,127 @@
 # coding=utf-8
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
 import tempfile
 import unittest
 from transformers import CLIPTokenizerFast, ProcessorMixin
 from transformers.models.auto.processing_auto import processor_class_from_name
 from transformers.testing_utils import (
    check_json_file_has_correct_format,
    require_tokenizers,
    require_torch,
    require_vision,
 )
 from transformers.utils import is_vision_available
 if is_vision_available():
    from transformers import CLIPImageProcessor
@require_torch
 class ProcessorTesterMixin:
    processor_class = None
    def prepare_processor_dict(self):
        return {}
    def get_component(self, attribute, **kwargs):
        assert attribute in self.processor_class.attributes
        component_class_name = getattr(self.processor_class, f"{attribute}_class")
        if isinstance(component_class_name, tuple):
            component_class_name = component_class_name[0]
        component_class = processor_class_from_name(component_class_name)
        component = component_class.from_pretrained(self.tmpdirname, **kwargs)  # noqa
        return component
    def prepare_components(self):
        components = {}
        for attribute in self.processor_class.attributes:
            component = self.get_component(attribute)
            components[attribute] = component
        return components
    def get_processor(self):
        components = self.prepare_components()
        processor = self.processor_class(**components, **self.prepare_processor_dict())
        return processor
    def test_processor_to_json_string(self):
        processor = self.get_processor()
        obj = json.loads(processor.to_json_string())
        for key, value in self.prepare_processor_dict().items():
            self.assertEqual(obj[key], value)
            self.assertEqual(getattr(processor, key, None), value)
    def test_processor_from_and_save_pretrained(self):
        processor_first = self.get_processor()
        with tempfile.TemporaryDirectory() as tmpdirname:
            saved_file = processor_first.save_pretrained(tmpdirname)[0]
            check_json_file_has_correct_format(saved_file)
            processor_second = self.processor_class.from_pretrained(tmpdirname)
        self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
 class MyProcessor(ProcessorMixin):
    attributes = ["image_processor", "tokenizer"]
    image_processor_class = "CLIPImageProcessor"
    tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
    def __init__(self, image_processor=None, tokenizer=None, processor_attr_1=1, processor_attr_2=True):
        super().__init__(image_processor, tokenizer)
        self.processor_attr_1 = processor_attr_1
        self.processor_attr_2 = processor_attr_2
@require_tokenizers
@require_vision
 class ProcessorTest(unittest.TestCase):
    processor_class = MyProcessor
    def prepare_processor_dict(self):
        return {"processor_attr_1": 1, "processor_attr_2": False}
    def get_processor(self):
        image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
        tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-large-patch14")
        processor = MyProcessor(image_processor, tokenizer, **self.prepare_processor_dict())
        return processor
    def test_processor_to_json_string(self):
        processor = self.get_processor()
        obj = json.loads(processor.to_json_string())
        for key, value in self.prepare_processor_dict().items():
            self.assertEqual(obj[key], value)
            self.assertEqual(getattr(processor, key, None), value)
    def test_processor_from_and_save_pretrained(self):
        processor_first = self.get_processor()
        with tempfile.TemporaryDirectory() as tmpdirname:
            saved_file = processor_first.save_pretrained(tmpdirname)[0]
            check_json_file_has_correct_format(saved_file)
            processor_second = self.processor_class.from_pretrained(tmpdirname)
        self.assertEqual(processor_second.to_dict(), processor_first.to_dict())