PoC for a ProcessorMixin class (#15549)

* PoC for a ProcessorMixin class * Documentation * Apply suggestions from code review Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Roll out to other processors * Add base feature extractor class in init * Use args and kwargs Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> Co-authored-by: Suraj Patil <surajp815@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2022-02-09 09:24:49 -05:00
parent ba3f9a71a1
commit b5c6fdecf0
13 changed files with 240 additions and 726 deletions
--- a/docs/source/main_classes/processors.mdx
+++ b/docs/source/main_classes/processors.mdx
@@ -12,10 +12,22 @@ specific language governing permissions and limitations under the License.
 # Processors
-This library includes processors for several traditional tasks. These processors can be used to process a dataset into
+Processors can mean two different things in the Transformers library:
-examples that can be fed to a model.
+- the objects that pre-process inputs for multi-modal models such as [Wav2Vec2](../model_doc/wav2vec2) (speech and text)
  or [CLIP](../model_doc/clip) (text and vision)
 - deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
-## Processors
+## Multi-modal processors
 Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
 vision and audio). This is handled by objects called processors, which group tokenizers (for the text modality) and
 feature extractors (for vision and audio).
 Those processors inherit from the following base class that implements the saving and loading functionality:
 [[autodoc]] ProcessorMixin
 ## Deprecated processors
 All processors follow the same architecture which is that of the
 [`~data.processors.utils.DataProcessor`]. The processor returns a list of
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -95,7 +95,7 @@ _import_structure = {
    "dependency_versions_table": [],
    "dynamic_module_utils": [],
    "feature_extraction_sequence_utils": ["SequenceFeatureExtractor"],
-    "feature_extraction_utils": ["BatchFeature"],
+    "feature_extraction_utils": ["BatchFeature", "FeatureExtractionMixin"],
    "file_utils": [
        "CONFIG_NAME",
        "MODEL_CARD_NAME",
@@ -365,6 +365,7 @@ _import_structure = {
        "ZeroShotClassificationPipeline",
        "pipeline",
    ],
    "processing_utils": ["ProcessorMixin"],
    "testing_utils": [],
    "tokenization_utils": ["PreTrainedTokenizer"],
    "tokenization_utils_base": [
@@ -2307,7 +2308,7 @@ if TYPE_CHECKING:
    from .feature_extraction_sequence_utils import SequenceFeatureExtractor
    # Feature Extractor
-    from .feature_extraction_utils import BatchFeature
+    from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin
    # Files and general utilities
    from .file_utils import (
@@ -2555,6 +2556,7 @@ if TYPE_CHECKING:
        ZeroShotClassificationPipeline,
        pipeline,
    )
    from .processing_utils import ProcessorMixin
    # Tokenization
    from .tokenization_utils import PreTrainedTokenizer
--- a/src/transformers/models/clip/processing_clip.py
+++ b/src/transformers/models/clip/processing_clip.py
@@ -15,12 +15,11 @@
 """
 Image/Text processor class for CLIP
 """
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding
 from .feature_extraction_clip import CLIPFeatureExtractor
 from .tokenization_clip import CLIPTokenizer
-class CLIPProcessor:
+class CLIPProcessor(ProcessorMixin):
    r"""
    Constructs a CLIP processor which wraps a CLIP feature extractor and a CLIP tokenizer into a single processor.
@@ -33,77 +32,13 @@ class CLIPProcessor:
        tokenizer ([`CLIPTokenizer`]):
            The tokenizer is a required input.
    """
    feature_extractor_class = "CLIPFeatureExtractor"
    tokenizer_class = "CLIPTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, CLIPFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type CLIPFeatureExtractor, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, CLIPTokenizer):
            raise ValueError(f"`tokenizer` has to be of type CLIPTokenizer, but is {type(tokenizer)}")
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a CLIP feature extractor object and CLIP tokenizer object to the directory `save_directory`, so that it
        can be re-loaded using the [`~CLIPProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`CLIPProcessor`] from a pretrained CLIP processor.
        <Tip>
        This class method is simply calling CLIPFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
        CLIPTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
        docstrings of the methods above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `clip-vit-base-patch32`, or
                  namespaced under a user or organization name, like `openai/clip-vit-base-patch32`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = CLIPFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, text=None, images=None, return_tensors=None, **kwargs):
        """
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
--- a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py
@@ -18,13 +18,11 @@ Processor class for LayoutLMv2.
 from typing import List, Optional, Union
 from ...file_utils import TensorType
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from .feature_extraction_layoutlmv2 import LayoutLMv2FeatureExtractor
 from .tokenization_layoutlmv2 import LayoutLMv2Tokenizer
 from .tokenization_layoutlmv2_fast import LayoutLMv2TokenizerFast
-class LayoutLMv2Processor:
+class LayoutLMv2Processor(ProcessorMixin):
    r"""
    Constructs a LayoutLMv2 processor which combines a LayoutLMv2 feature extractor and a LayoutLMv2 tokenizer into a
    single processor.
@@ -43,84 +41,8 @@ class LayoutLMv2Processor:
        tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`):
            An instance of [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input.
    """
-
+    feature_extractor_class = "LayoutLMv2FeatureExtractor"
-    def __init__(self, feature_extractor, tokenizer):
+    tokenizer_class = ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast")
        if not isinstance(feature_extractor, LayoutLMv2FeatureExtractor):
            raise ValueError(
                f"`feature_extractor` has to be of type {LayoutLMv2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, (LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast)):
            raise ValueError(
                f"`tokenizer` has to be of type {LayoutLMv2Tokenizer.__class__} or {LayoutLMv2TokenizerFast.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
    def save_pretrained(self, save_directory):
        """
        Save a LayoutLMv2 feature_extractor object and LayoutLMv2 tokenizer object to the directory `save_directory`,
        so that it can be re-loaded using the [`~LayoutLMv2Processor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
        r"""
        Instantiate a [`LayoutLMv2Processor`] from a pretrained LayoutLMv2 processor.
        <Tip>
        This class method is simply calling LayoutLMv2FeatureExtractor's
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and LayoutLMv2TokenizerFast's
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            use_fast (`bool`, *optional*, defaults to `True`):
                Whether or not to instantiate a fast tokenizer.
            **kwargs
                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if use_fast:
            tokenizer = LayoutLMv2TokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
        else:
            tokenizer = LayoutLMv2Tokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,
--- a/src/transformers/models/layoutxlm/processing_layoutxlm.py
+++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py
@@ -17,15 +17,12 @@ Processor class for LayoutXLM.
 """
 from typing import List, Optional, Union
 from transformers.models.layoutlmv2.feature_extraction_layoutlmv2 import LayoutLMv2FeatureExtractor
 from ...file_utils import TensorType
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from .tokenization_layoutxlm import LayoutXLMTokenizer
 from .tokenization_layoutxlm_fast import LayoutXLMTokenizerFast
-class LayoutXLMProcessor:
+class LayoutXLMProcessor(ProcessorMixin):
    r"""
    Constructs a LayoutXLM processor which combines a LayoutXLM feature extractor and a LayoutXLM tokenizer into a
    single processor.
@@ -44,84 +41,8 @@ class LayoutXLMProcessor:
        tokenizer (`LayoutXLMTokenizer` or `LayoutXLMTokenizerFast`):
            An instance of [`LayoutXLMTokenizer`] or [`LayoutXLMTokenizerFast`]. The tokenizer is a required input.
    """
-
+    feature_extractor_class = "LayoutLMv2FeatureExtractor"
-    def __init__(self, feature_extractor, tokenizer):
+    tokenizer_class = ("LayoutXLMTokenizer", "LayoutXLMTokenizerFast")
        if not isinstance(feature_extractor, LayoutLMv2FeatureExtractor):
            raise ValueError(
                f"`feature_extractor` has to be of type {LayoutLMv2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, (LayoutXLMTokenizer, LayoutXLMTokenizerFast)):
            raise ValueError(
                f"`tokenizer` has to be of type {LayoutXLMTokenizer.__class__} or {LayoutXLMTokenizerFast.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
    def save_pretrained(self, save_directory):
        """
        Save a LayoutXLM feature_extractor object and LayoutXLM tokenizer object to the directory `save_directory`, so
        that it can be re-loaded using the [`~LayoutXLMProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, use_fast=True, **kwargs):
        r"""
        Instantiate a [`LayoutXLMProcessor`] from a pretrained LayoutXLM processor.
        <Tip>
        This class method is simply calling Layoutv2FeatureExtractor's
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and LayoutXLMTokenizerFast's
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            use_fast (`bool`, *optional*, defaults to `True`):
                Whether or not to instantiate a fast tokenizer.
            **kwargs
                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = LayoutLMv2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if use_fast:
            tokenizer = LayoutXLMTokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
        else:
            tokenizer = LayoutXLMTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,
--- a/src/transformers/models/speech_to_text/processing_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/processing_speech_to_text.py
@@ -17,11 +17,10 @@ Speech processor class for Speech2Text
 """
 from contextlib import contextmanager
-from .feature_extraction_speech_to_text import Speech2TextFeatureExtractor
+from ...processing_utils import ProcessorMixin
 from .tokenization_speech_to_text import Speech2TextTokenizer
-class Speech2TextProcessor:
+class Speech2TextProcessor(ProcessorMixin):
    r"""
    Constructs a Speech2Text processor which wraps a Speech2Text feature extractor and a Speech2Text tokenizer into a
    single processor.
@@ -36,79 +35,13 @@ class Speech2TextProcessor:
        tokenizer (`Speech2TextTokenizer`):
            An instance of [`Speech2TextTokenizer`]. The tokenizer is a required input.
    """
    feature_extractor_class = "Speech2TextFeatureExtractor"
    tokenizer_class = "Speech2TextTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, Speech2TextFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {Speech2TextFeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, Speech2TextTokenizer):
            raise ValueError(
                f"`tokenizer` has to be of type {Speech2TextTokenizer.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a Speech2Text feature extractor object and Speech2Text tokenizer object to the directory `save_directory`,
        so that it can be re-loaded using the [`~Speech2TextProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`Speech2TextProcessor`] from a pretrained Speech2Text processor.
        <Tip>
        This class method is simply calling Speech2TextFeatureExtractor's
        [`~PreTrainedFeatureExtractor.from_pretrained`] and Speech2TextTokenizer's
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = Speech2TextFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = Speech2TextTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to Speech2TextFeatureExtractor's
--- a/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
+++ b/src/transformers/models/speech_to_text_2/processing_speech_to_text_2.py
@@ -17,12 +17,10 @@ Speech processor class for Speech2Text2
 """
 from contextlib import contextmanager
-from ...feature_extraction_sequence_utils import SequenceFeatureExtractor
+from ...processing_utils import ProcessorMixin
 from ..auto.feature_extraction_auto import AutoFeatureExtractor
 from .tokenization_speech_to_text_2 import Speech2Text2Tokenizer
-class Speech2Text2Processor:
+class Speech2Text2Processor(ProcessorMixin):
    r"""
    Constructs a Speech2Text2 processor which wraps a Speech2Text2 feature extractor and a Speech2Text2 tokenizer into
    a single processor.
@@ -36,77 +34,13 @@ class Speech2Text2Processor:
        tokenizer (`Speech2Text2Tokenizer`):
            An instance of [`Speech2Text2Tokenizer`]. The tokenizer is a required input.
    """
    feature_extractor_class = "AutoFeatureExtractor"
    tokenizer_class = "Speech2Text2Tokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, SequenceFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {SequenceFeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, Speech2Text2Tokenizer):
            raise ValueError(
                f"`tokenizer` has to be of type {Speech2Text2Tokenizer.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a Speech2Text2 feature extractor object and Speech2Text2 tokenizer object to the directory
        `save_directory`, so that it can be re-loaded using the [`~Speech2Text2Processor.from_pretrained`] class
        method.
        <Tip>
        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`Speech2Text2Processor`] from a pretrained Speech2Text2 processor.
        <Tip>
        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
        Speech2Text2Tokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
        docstrings of the methods above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = Speech2Text2Tokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to AutoFeatureExtractor's
--- a/src/transformers/models/trocr/processing_trocr.py
+++ b/src/transformers/models/trocr/processing_trocr.py
@@ -17,15 +17,10 @@ Processor class for TrOCR.
 """
 from contextlib import contextmanager
-from transformers import AutoFeatureExtractor, AutoTokenizer
+from ...processing_utils import ProcessorMixin
 from transformers.feature_extraction_utils import FeatureExtractionMixin
 from transformers.models.roberta.tokenization_roberta import RobertaTokenizer
 from transformers.models.roberta.tokenization_roberta_fast import RobertaTokenizerFast
 from transformers.models.xlm_roberta.tokenization_xlm_roberta import XLMRobertaTokenizer
 from transformers.models.xlm_roberta.tokenization_xlm_roberta_fast import XLMRobertaTokenizerFast
-class TrOCRProcessor:
+class TrOCRProcessor(ProcessorMixin):
    r"""
    Constructs a TrOCR processor which wraps a vision feature extractor and a TrOCR tokenizer into a single processor.
@@ -39,78 +34,13 @@ class TrOCRProcessor:
        tokenizer ([`RobertaTokenizer`/`XLMRobertaTokenizer`]):
            An instance of [`RobertaTokenizer`/`XLMRobertaTokenizer`]. The tokenizer is a required input.
    """
    feature_extractor_class = "AutoFeatureExtractor"
    tokenizer_class = "AutoTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, FeatureExtractionMixin):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {FeatureExtractionMixin.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(
            tokenizer, (RobertaTokenizer, RobertaTokenizerFast, XLMRobertaTokenizer, XLMRobertaTokenizerFast)
        ):
            raise ValueError(
                f"`tokenizer` has to be of type {RobertaTokenizer.__class__} or {RobertaTokenizerFast.__class__} or {XLMRobertaTokenizer.__class__} or {XLMRobertaTokenizerFast.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a TrOCR feature extractor object and TrOCR tokenizer object to the directory `save_directory`, so that it
        can be re-loaded using the [`~TrOCRProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`TrOCRProcessor`] from a pretrained TrOCR processor.
        <Tip>
        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
        TrOCRTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
        docstrings of the methods above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, *args, **kwargs):
        """
        When used in normal mode, this method forwards all its arguments to AutoFeatureExtractor's
--- a/src/transformers/models/vilt/processing_vilt.py
+++ b/src/transformers/models/vilt/processing_vilt.py
@@ -18,14 +18,12 @@ Processor class for ViLT.
 from typing import List, Optional, Union
 from transformers import BertTokenizerFast
 from ...file_utils import TensorType
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding, PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from .feature_extraction_vilt import ViltFeatureExtractor
-class ViltProcessor:
+class ViltProcessor(ProcessorMixin):
    r"""
    Constructs a ViLT processor which wraps a BERT tokenizer and ViLT feature extractor into a single processor.
@@ -38,75 +36,13 @@ class ViltProcessor:
        tokenizer (`BertTokenizerFast`):
            An instance of ['BertTokenizerFast`]. The tokenizer is a required input.
    """
    feature_extractor_class = "ViltFeatureExtractor"
    tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, ViltFeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {ViltFeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, BertTokenizerFast):
            raise ValueError(f"`tokenizer` has to be of type {BertTokenizerFast.__class__}, but is {type(tokenizer)}")
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a ViLT feature_extractor object and BERT tokenizer object to the directory `save_directory`, so that it
        can be re-loaded using the [`~ViltProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`ViltProcessor`] from a pretrained ViLT processor.
        <Tip>
        This class method is simply calling ViltFeatureExtractor's
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and BertTokenizerFast's
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = ViltFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(
        self,
        images,
--- a/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py
+++ b/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py
@@ -15,17 +15,12 @@
 """
 Processor class for VisionTextDualEncoder
 """
 from typing import Union
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 from transformers.feature_extraction_utils import FeatureExtractionMixin
 from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_base import BatchEncoding
 from ..auto.feature_extraction_auto import AutoFeatureExtractor
 from ..auto.tokenization_auto import AutoTokenizer
-class VisionTextDualEncoderProcessor:
+class VisionTextDualEncoderProcessor(ProcessorMixin):
    r"""
    Constructs a VisionTextDualEncoder processor which wraps a vision feature extractor and a tokenizer into a single
    processor.
@@ -40,82 +35,13 @@ class VisionTextDualEncoderProcessor:
        tokenizer ([`PreTrainedTokenizer`]):
            The tokenizer is a required input.
    """
    feature_extractor_class = "AutoFeatureExtractor"
    tokenizer_class = "AutoTokenizer"
-    def __init__(
+    def __init__(self, feature_extractor, tokenizer):
-        self, feature_extractor: FeatureExtractionMixin, tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
+        super().__init__(feature_extractor, tokenizer)
    ):
        if not isinstance(feature_extractor, FeatureExtractionMixin):
            raise ValueError(
                f"`feature_extractor` has to be of type {FeatureExtractionMixin.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
            raise ValueError(
                f"`tokenizer` has to be of type `PreTrainedTokenizer` or `PreTrainedTokenizerFast`, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a VisionTextDualEncoder feature extractor object and VisionTextDualEncoder tokenizer object to the
        directory `save_directory`, so that it can be re-loaded using the
        [`~VisionTextDualEncoderProcessor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~PreTrainedFeatureExtractor.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`VisionTextDualEncoderProcessor`] from a pretrained VisionTextDualEncoder processor.
        <Tip>
        This class method is simply calling AutoFeatureExtractor's [`~PreTrainedFeatureExtractor.from_pretrained`] and
        AutoTokenizer's [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the
        docstrings of the methods above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~PreTrainedFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`PreTrainedFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
    def __call__(self, text=None, images=None, return_tensors=None, **kwargs):
        """
        Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
--- a/src/transformers/models/wav2vec2/processing_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/processing_wav2vec2.py
@@ -18,14 +18,12 @@ Speech processor class for Wav2Vec2
 import warnings
 from contextlib import contextmanager
-from ...tokenization_utils import PreTrainedTokenizer
+from ...processing_utils import ProcessorMixin
 from ...tokenization_utils_fast import PreTrainedTokenizerFast
 from ..auto.tokenization_auto import AutoTokenizer
 from .feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor
 from .tokenization_wav2vec2 import Wav2Vec2CTCTokenizer
-class Wav2Vec2Processor:
+class Wav2Vec2Processor(ProcessorMixin):
    r"""
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
    processor.
@@ -39,82 +37,17 @@ class Wav2Vec2Processor:
        tokenizer ([`PreTrainedTokenizer`]):
            An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
    """
    feature_extractor_class = "Wav2Vec2FeatureExtractor"
    tokenizer_class = "AutoTokenizer"
    def __init__(self, feature_extractor, tokenizer):
-        if not isinstance(feature_extractor, Wav2Vec2FeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {Wav2Vec2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
            raise ValueError(
                f"`tokenizer` has to be of type {PreTrainedTokenizer.__class__}, but is {type(tokenizer)}"
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
        """
        Save a Wav2Vec2 feature_extractor object and Wav2Vec2 tokenizer object to the directory `save_directory`, so
        that it can be re-loaded using the [`~Wav2Vec2Processor.from_pretrained`] class method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a [`Wav2Vec2Processor`] from a pretrained Wav2Vec2 processor.
        <Tip>
        This class method is simply calling Wav2Vec2FeatureExtractor's
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and PreTrainedTokenizer's
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both [`SequenceFeatureExtractor`] and
                [`PreTrainedTokenizer`]
        """
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
        # load generic `AutoTokenizer`
        # need fallback here for backward compatibility in case processor is
        # loaded from just a tokenizer file that does not have a `tokenizer_class` attribute
        # behavior should be deprecated in major future release
        try:
-            tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
+            return super().from_pretrained(pretrained_model_name_or_path, **kwargs)
        except OSError:
            warnings.warn(
                f"Loading a tokenizer inside {cls.__name__} from a config that does not"
@@ -124,6 +57,8 @@ class Wav2Vec2Processor:
                "file to suppress this warning: ",
                FutureWarning,
            )
            feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
            tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
            return cls(feature_extractor=feature_extractor, tokenizer=tokenizer)
--- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
+++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
@@ -23,16 +23,16 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Union
 import numpy as np
 from ...feature_extraction_utils import FeatureExtractionMixin
 from ...file_utils import ModelOutput, requires_backends
-from ...tokenization_utils import PreTrainedTokenizer
+from ...processing_utils import ProcessorMixin
 from ..wav2vec2.feature_extraction_wav2vec2 import Wav2Vec2FeatureExtractor
 from ..wav2vec2.tokenization_wav2vec2 import Wav2Vec2CTCTokenizer
 if TYPE_CHECKING:
    from pyctcdecode import BeamSearchDecoderCTC
    from ...feature_extraction_utils import FeatureExtractionMixin
    from ...tokenization_utils import PreTrainedTokenizerBase
@dataclass
 class Wav2Vec2DecoderWithLMOutput(ModelOutput):
@@ -47,7 +47,7 @@ class Wav2Vec2DecoderWithLMOutput(ModelOutput):
    text: Union[List[str], str]
-class Wav2Vec2ProcessorWithLM:
+class Wav2Vec2ProcessorWithLM(ProcessorMixin):
    r"""
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor, a Wav2Vec2 CTC tokenizer and a decoder
    with language model support into a single processor for language model boosted speech recognition decoding.
@@ -60,24 +60,18 @@ class Wav2Vec2ProcessorWithLM:
        decoder (`pyctcdecode.BeamSearchDecoderCTC`):
            An instance of [`pyctcdecode.BeamSearchDecoderCTC`]. The decoder is a required input.
    """
    feature_extractor_class = "Wav2Vec2FeatureExtractor"
    tokenizer_class = "Wav2Vec2CTCTokenizer"
    def __init__(
        self,
-        feature_extractor: FeatureExtractionMixin,
+        feature_extractor: "FeatureExtractionMixin",
-        tokenizer: PreTrainedTokenizer,
+        tokenizer: "PreTrainedTokenizerBase",
        decoder: "BeamSearchDecoderCTC",
    ):
        from pyctcdecode import BeamSearchDecoderCTC
-        if not isinstance(feature_extractor, Wav2Vec2FeatureExtractor):
+        super().__init__(feature_extractor, tokenizer)
            raise ValueError(
                f"`feature_extractor` has to be of type {Wav2Vec2FeatureExtractor.__class__}, but is {type(feature_extractor)}"
            )
        if not isinstance(tokenizer, Wav2Vec2CTCTokenizer):
            # TODO(PVP) - this can be relaxed in the future to allow other kinds of tokenizers
            raise ValueError(
                f"`tokenizer` has to be of type {Wav2Vec2CTCTokenizer.__class__}, but is {type(tokenizer)}"
            )
        if not isinstance(decoder, BeamSearchDecoderCTC):
            raise ValueError(f"`decoder` has to be of type {BeamSearchDecoderCTC.__class__}, but is {type(decoder)}")
@@ -90,37 +84,11 @@ class Wav2Vec2ProcessorWithLM:
                f"Make sure to include {missing_decoder_tokens} in the decoder's alphabet."
            )
        self.feature_extractor = feature_extractor
        self.tokenizer = tokenizer
        self.decoder = decoder
        self.current_processor = self.feature_extractor
    def save_pretrained(self, save_directory):
-        """
+        super().save_pretrained(save_directory)
        Save the Wav2Vec2 feature_extractor, a tokenizer object and a pyctcdecode decoder to the directory
        `save_directory`, so that they can be re-loaded using the [`~Wav2Vec2ProcessorWithLM.from_pretrained`] class
        method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained,`]
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`] and pyctcdecode's
        [`pyctcdecode.BeamSearchDecoderCTC.save_to_dir`].
        Please refer to the docstrings of the methods above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        self.feature_extractor._set_processor_class(self.__class__.__name__)
        self.feature_extractor.save_pretrained(save_directory)
        self.tokenizer._set_processor_class(self.__class__.__name__)
        self.tokenizer.save_pretrained(save_directory)
        self.decoder.save_to_dir(save_directory)
    @classmethod
@@ -157,8 +125,7 @@ class Wav2Vec2ProcessorWithLM:
        requires_backends(cls, "pyctcdecode")
        from pyctcdecode import BeamSearchDecoderCTC
-        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        feature_extractor, tokenizer = super()._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
        tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
        if os.path.isdir(pretrained_model_name_or_path):
            decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path)
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -0,0 +1,161 @@
 # coding=utf-8
 # Copyright 2022 The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Processing saving/loading class for common processors.
 """
 import importlib.util
 from pathlib import Path
 # Comment to write
 spec = importlib.util.spec_from_file_location(
    "transformers", Path(__file__).parent / "__init__.py", submodule_search_locations=[Path(__file__).parent]
 )
 transformers_module = spec.loader.load_module()
 AUTO_TO_BASE_CLASS_MAPPING = {
    "AutoTokenizer": "PreTrainedTokenizerBase",
    "AutoFeatureExtractor": "FeatureExtractionMixin",
 }
 class ProcessorMixin:
    """
    This is a mixin used to provide saving/loading functionality for all processor classes.
    """
    attributes = ["feature_extractor", "tokenizer"]
    # Names need to be attr_class for attr in attributes
    feature_extractor_class = None
    tokenizer_class = None
    # args have to match the attributes class attribute
    def __init__(self, *args, **kwargs):
        # Sanitize args and kwargs
        for key in kwargs:
            if key not in self.attributes:
                raise TypeError(f"Unexepcted keyword argument {key}.")
        for arg, attribute_name in zip(args, self.attributes):
            if attribute_name in kwargs:
                raise TypeError(f"Got multiple values for argument {attribute_name}.")
            else:
                kwargs[attribute_name] = arg
        if len(kwargs) != len(self.attributes):
            raise ValueError(
                f"This processor requires {len(self.attributes)} arguments: {', '.join(self.attributes)}. Got "
                f"{len(args)} arguments instead."
            )
        # Check each arg is of the proper class (this will also catch a user initializing in the wrong order)
        for attribute_name, arg in kwargs.items():
            class_name = getattr(self, f"{attribute_name}_class")
            # Nothing is ever going to be an instance of "AutoXxx", in that case we check the base class.
            class_name = AUTO_TO_BASE_CLASS_MAPPING.get(class_name, class_name)
            if isinstance(class_name, tuple):
                proper_class = tuple(getattr(transformers_module, n) for n in class_name if n is not None)
            else:
                proper_class = getattr(transformers_module, class_name)
            if not isinstance(arg, proper_class):
                raise ValueError(
                    f"Received a {type(arg).__name__} for argument {attribute_name}, but a {class_name} was expected."
                )
            setattr(self, attribute_name, arg)
    def __repr__(self):
        attributes_repr = [f"- {name}: {repr(getattr(self, name))}" for name in self.attributes]
        attributes_repr = "\n".join(attributes_repr)
        return f"{self.__class__.__name__}:\n{attributes_repr}"
    def save_pretrained(self, save_directory):
        """
        Saves the attributes of this processor (feature extractor, tokenizer...) in the specified directory so that it
        can be reloaded using the [`~ProcessorMixin.from_pretrained`] method.
        <Tip>
        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.save_pretrained`]. Please refer to the docstrings of the methods
        above for more information.
        </Tip>
        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
        """
        for attribute_name in self.attributes:
            attribute = getattr(self, attribute_name)
            # Include the processor class in the attribute config so this processor can then be reloaded with the
            # `AutoProcessor` API.
            if hasattr(attribute, "_set_processor_class"):
                attribute._set_processor_class(self.__class__.__name__)
            attribute.save_pretrained(save_directory)
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        r"""
        Instantiate a processor associated with a pretrained model.
        <Tip>
        This class method is simply calling the feature extractor
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and the tokenizer
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`] methods. Please refer to the docstrings of the
        methods above for more information.
        </Tip>
        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:
                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both
                [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
                [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
        """
        args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
        return cls(*args)
    @classmethod
    def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        args = []
        for attribute_name in cls.attributes:
            class_name = getattr(cls, f"{attribute_name}_class")
            if isinstance(class_name, tuple):
                classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
                use_fast = kwargs.get("use_fast", True)
                if use_fast and classes[1] is not None:
                    attribute_class = classes[1]
                else:
                    attribute_class = classes[0]
            else:
                attribute_class = getattr(transformers_module, class_name)
            args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
        return args