[breaking|pipelines|tokenizers] Adding slow-fast tokenizers equivalence tests pipelines - Removing sentencepiece as a required dependency (#8073)

* Fixing roberta for slow-fast tests * WIP getting equivalence on pipelines * slow-to-fast equivalence - working on question-answering pipeline * optional FAISS tests * Pipeline Q&A * Move pipeline tests to their own test job again * update tokenizer to add sequence id methods * update to tokenizers 0.9.4 * set sentencepiecce as optional * clean up squad * clean up pipelines to use sequence_ids * style/quality * wording * Switch to use_fast = True by default * update tests for use_fast at True by default * fix rag tokenizer test * removing protobuf from required dependencies * fix NER test for use_fast = True by default * fixing example tests (Q&A examples use slow tokenizers for now) * protobuf in main deps extras["sentencepiece"] and example deps * fix protobug install test * try to fix seq2seq by switching to slow tokenizers for now * Update src/transformers/tokenization_utils_base.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/tokenization_utils_base.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
2020-11-15 22:50:59 +01:00
parent 24184e73c4
commit f4e04cd2c6
23 changed files with 689 additions and 262 deletions
--- a/examples/question-answering/run_squad.py
+++ b/examples/question-answering/run_squad.py
@@ -736,6 +736,7 @@ def main():
        args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
    )
    model = AutoModelForQuestionAnswering.from_pretrained(
        args.model_name_or_path,
@@ -784,7 +785,10 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForQuestionAnswering.from_pretrained(args.output_dir)  # , force_download=True)
-        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
+
        # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
        # So we use use_fast=False here for now until Fast-tokenizer-compatible-examples are out
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case, use_fast=False)
        model.to(args.device)
    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
--- a/examples/question-answering/run_squad_trainer.py
+++ b/examples/question-answering/run_squad_trainer.py
@@ -114,6 +114,7 @@ def main():
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=False,  # SquadDataset is not compatible with Fast tokenizers which have a smarter overflow handeling
    )
    model = AutoModelForQuestionAnswering.from_pretrained(
        model_args.model_name_or_path,
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -18,3 +18,4 @@ fire
 pytest
 conllu
 sentencepiece != 0.1.92
 protobuf
--- a/examples/seq2seq/test_datasets.py
+++ b/examples/seq2seq/test_datasets.py
@@ -197,7 +197,7 @@ class TestAll(TestCasePlus):
    )
    @require_torch_non_multi_gpu_but_fix_me
    def test_dataset_kwargs(self, tok_name):
-        tokenizer = AutoTokenizer.from_pretrained(tok_name)
+        tokenizer = AutoTokenizer.from_pretrained(tok_name, use_fast=False)
        if tok_name == MBART_TINY:
            train_dataset = Seq2SeqDataset(
                tokenizer,
--- a/setup.py
+++ b/setup.py
@@ -96,13 +96,13 @@ else:
    extras["retrieval"] = ["faiss-cpu", "datasets"]
    extras["flax"] = ["jaxlib==0.1.55", "jax>=0.2.0", "flax==0.2.2"]
-extras["tokenizers"] = ["tokenizers==0.9.2"]
+extras["tokenizers"] = ["tokenizers==0.9.4"]
 extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
 extras["modelcreation"] = ["cookiecutter==1.7.2"]
 extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
-extras["sentencepiece"] = ["sentencepiece==0.1.91"]
+extras["sentencepiece"] = ["sentencepiece==0.1.91", "protobuf"]
 extras["retrieval"] = ["faiss-cpu", "datasets"]
 extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"] + extras["modelcreation"]
 # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
@@ -130,7 +130,7 @@ setup(
    packages=find_packages("src"),
    install_requires=[
        "numpy",
-        "tokenizers == 0.9.3",
+        "tokenizers == 0.9.4",
        # dataclasses for Python versions that don't have it
        "dataclasses;python_version<'3.7'",
        # utilities from PyPA to e.g. compare versions
@@ -143,9 +143,6 @@ setup(
        "tqdm >= 4.27",
        # for OpenAI GPT
        "regex != 2019.12.17",
        # for SentencePiece models
        "sentencepiece == 0.1.91",
        "protobuf",
        # for XLM
        "sacremoses",
    ],
--- a/src/transformers/convert_slow_tokenizer.py
+++ b/src/transformers/convert_slow_tokenizer.py
@@ -24,10 +24,7 @@ from typing import Dict, List, Tuple
 from tokenizers import Tokenizer, decoders, normalizers, pre_tokenizers, processors
 from tokenizers.models import BPE, Unigram, WordPiece
-# from transformers.tokenization_openai import OpenAIGPTTokenizer
+from .file_utils import requires_protobuf, requires_sentencepiece
 from transformers.utils import sentencepiece_model_pb2 as model
 from .file_utils import requires_sentencepiece
 class SentencePieceExtractor:
@@ -64,12 +61,6 @@ def check_number_comma(piece: str) -> bool:
    return len(piece) < 2 or piece[-1] != "," or not piece[-2].isdigit()
 def get_proto(filename: str):
    m = model.ModelProto()
    m.ParseFromString(open(filename, "rb").read())
    return m
 class Converter:
    def __init__(self, original_tokenizer):
        self.original_tokenizer = original_tokenizer
@@ -292,8 +283,15 @@ class RobertaConverter(Converter):
 class SpmConverter(Converter):
    def __init__(self, *args):
        requires_protobuf(self)
        super().__init__(*args)
-        self.proto = get_proto(self.original_tokenizer.vocab_file)
+
        from .utils import sentencepiece_model_pb2 as model_pb2
        m = model_pb2.ModelProto()
        m.ParseFromString(open(self.original_tokenizer.vocab_file, "rb").read())
        self.proto = m
    def vocab(self, proto):
        return [(piece.piece, piece.score) for piece in proto.pieces]
--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -8,7 +8,7 @@ from tqdm import tqdm
 from ...file_utils import is_tf_available, is_torch_available
 from ...tokenization_bert import whitespace_tokenize
-from ...tokenization_utils_base import PreTrainedTokenizerBase, TruncationStrategy
+from ...tokenization_utils_base import BatchEncoding, PreTrainedTokenizerBase, TruncationStrategy
 from ...utils import logging
 from .utils import DataProcessor
@@ -765,6 +765,7 @@ class SquadFeatures:
        token_to_orig_map: mapping between the tokens and the original text, needed in order to identify the answer.
        start_position: start of the answer token index
        end_position: end of the answer token index
        encoding: optionally store the BatchEncoding with the fast-tokenizer alignement methods.
    """
    def __init__(
@@ -784,6 +785,7 @@ class SquadFeatures:
        end_position,
        is_impossible,
        qas_id: str = None,
        encoding: BatchEncoding = None,
    ):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
@@ -803,6 +805,8 @@ class SquadFeatures:
        self.is_impossible = is_impossible
        self.qas_id = qas_id
        self.encoding = encoding
 class SquadResult:
    """
--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -185,6 +185,15 @@ except ImportError:
    _sentencepiece_available = False
 try:
    import google.protobuf  # noqa: F401
    _protobuf_available = True
 except ImportError:
    _protobuf_available = False
 try:
    import tokenizers  # noqa: F401
@@ -270,6 +279,10 @@ def is_sentencepiece_available():
    return _sentencepiece_available
 def is_protobuf_available():
    return _protobuf_available
 def is_tokenizers_available():
    return _tokenizers_available
@@ -330,6 +343,14 @@ that match your environment.
 """
 # docstyle-ignore
 PROTOBUF_IMPORT_ERROR = """
 {0} requires the protobuf library but it was not found in your environment. Checkout the instructions on the
 installation page of its repo: https://github.com/protocolbuffers/protobuf/tree/master/python#installation and follow the ones
 that match your environment.
 """
 # docstyle-ignore
 FAISS_IMPORT_ERROR = """
 {0} requires the faiss library but it was not found in your environment. Checkout the instructions on the
@@ -420,6 +441,12 @@ def requires_sentencepiece(obj):
        raise ImportError(SENTENCEPIECE_IMPORT_ERROR.format(name))
 def requires_protobuf(obj):
    name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
    if not is_protobuf_available():
        raise ImportError(PROTOBUF_IMPORT_ERROR.format(name))
 def add_start_docstrings(*docstr):
    def docstring_decorator(fn):
        fn.__doc__ = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "")
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -32,7 +32,7 @@ import numpy as np
 from .configuration_auto import AutoConfig
 from .configuration_utils import PretrainedConfig
-from .data import SquadExample, squad_convert_examples_to_features
+from .data import SquadExample, SquadFeatures, squad_convert_examples_to_features
 from .file_utils import add_end_docstrings, is_tf_available, is_torch_available
 from .modelcard import ModelCard
 from .tokenization_auto import AutoTokenizer
@@ -1758,6 +1758,7 @@ class QuestionAnsweringPipeline(Pipeline):
            - **answer** (:obj:`str`) -- The answer to the question.
        """
        # Set defaults values
        kwargs.setdefault("padding", "longest")
        kwargs.setdefault("topk", 1)
        kwargs.setdefault("doc_stride", 128)
        kwargs.setdefault("max_answer_len", 15)
@@ -1773,19 +1774,87 @@ class QuestionAnsweringPipeline(Pipeline):
        # Convert inputs to features
        examples = self._args_parser(*args, **kwargs)
-        features_list = [
+        if not self.tokenizer.is_fast:
-            squad_convert_examples_to_features(
+            features_list = [
-                examples=[example],
+                squad_convert_examples_to_features(
-                tokenizer=self.tokenizer,
+                    examples=[example],
-                max_seq_length=kwargs["max_seq_len"],
+                    tokenizer=self.tokenizer,
-                doc_stride=kwargs["doc_stride"],
+                    max_seq_length=kwargs["max_seq_len"],
-                max_query_length=kwargs["max_question_len"],
+                    doc_stride=kwargs["doc_stride"],
-                padding_strategy=PaddingStrategy.MAX_LENGTH.value,
+                    max_query_length=kwargs["max_question_len"],
-                is_training=False,
+                    padding_strategy=PaddingStrategy.MAX_LENGTH.value,
-                tqdm_enabled=False,
+                    is_training=False,
-            )
+                    tqdm_enabled=False,
-            for example in examples
+                )
-        ]
+                for example in examples
            ]
        else:
            features_list = []
            for example in examples:
                # Define the side we want to truncate / pad and the text/pair sorting
                question_first = bool(self.tokenizer.padding_side == "right")
                encoded_inputs = self.tokenizer(
                    text=example.question_text if question_first else example.context_text,
                    text_pair=example.context_text if question_first else example.question_text,
                    padding=kwargs["padding"],
                    truncation="only_second" if question_first else "only_first",
                    max_length=kwargs["max_seq_len"],
                    stride=kwargs["doc_stride"],
                    return_tensors="np",
                    return_token_type_ids=True,
                    return_overflowing_tokens=True,
                    return_offsets_mapping=True,
                    return_special_tokens_mask=True,
                )
                # When the input is too long, it's converted in a batch of inputs with overflowing tokens
                # and a stride of overlap between the inputs. If a batch of inputs is given, a special output
                # "overflow_to_sample_mapping" indicate which member of the encoded batch belong to which original batch sample.
                # Here we tokenize examples one-by-one so we don't need to use "overflow_to_sample_mapping".
                # "num_span" is the number of output samples generated from the overflowing tokens.
                num_spans = len(encoded_inputs["input_ids"])
                # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
                # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
                p_mask = np.asarray(
                    [
                        [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
                        for span_id in range(num_spans)
                    ]
                )
                # keep the cls_token unmasked (some models use it to indicate unanswerable questions)
                if self.tokenizer.cls_token_id:
                    cls_index = np.nonzero(encoded_inputs["input_ids"] == self.tokenizer.cls_token_id)
                    p_mask[cls_index] = 0
                features = []
                for span_idx in range(num_spans):
                    features.append(
                        SquadFeatures(
                            input_ids=encoded_inputs["input_ids"][span_idx],
                            attention_mask=encoded_inputs["attention_mask"][span_idx],
                            token_type_ids=encoded_inputs["token_type_ids"][span_idx],
                            p_mask=p_mask[span_idx].tolist(),
                            encoding=encoded_inputs[span_idx],
                            # We don't use the rest of the values - and actually
                            # for Fast tokenizer we could totally avoid using SquadFeatures and SquadExample
                            cls_index=None,
                            token_to_orig_map={},
                            example_index=0,
                            unique_id=0,
                            paragraph_len=0,
                            token_is_max_context=0,
                            tokens=[],
                            start_position=0,
                            end_position=0,
                            is_impossible=False,
                            qas_id=None,
                        )
                    )
                features_list.append(features)
        all_answers = []
        for features, example in zip(features_list, examples):
            model_input_names = self.tokenizer.model_input_names + ["input_ids"]
@@ -1828,20 +1897,56 @@ class QuestionAnsweringPipeline(Pipeline):
                start_[0] = end_[0] = 0.0
                starts, ends, scores = self.decode(start_, end_, kwargs["topk"], kwargs["max_answer_len"])
-                char_to_word = np.array(example.char_to_word_offset)
+                if not self.tokenizer.is_fast:
                    char_to_word = np.array(example.char_to_word_offset)
-                # Convert the answer (tokens) back to the original text
+                    # Convert the answer (tokens) back to the original text
-                answers += [
+                    # Score: score from the model
-                    {
+                    # Start: Index of the first character of the answer in the context string
-                        "score": score.item(),
+                    # End: Index of the character following the last character of the answer in the context string
-                        "start": np.where(char_to_word == feature.token_to_orig_map[s])[0][0].item(),
+                    # Answer: Plain text of the answer
-                        "end": np.where(char_to_word == feature.token_to_orig_map[e])[0][-1].item(),
+                    answers += [
-                        "answer": " ".join(
+                        {
-                            example.doc_tokens[feature.token_to_orig_map[s] : feature.token_to_orig_map[e] + 1]
+                            "score": score.item(),
-                        ),
+                            "start": np.where(char_to_word == feature.token_to_orig_map[s])[0][0].item(),
-                    }
+                            "end": np.where(char_to_word == feature.token_to_orig_map[e])[0][-1].item(),
-                    for s, e, score in zip(starts, ends, scores)
+                            "answer": " ".join(
-                ]
+                                example.doc_tokens[feature.token_to_orig_map[s] : feature.token_to_orig_map[e] + 1]
                            ),
                        }
                        for s, e, score in zip(starts, ends, scores)
                    ]
                else:
                    # Convert the answer (tokens) back to the original text
                    # Score: score from the model
                    # Start: Index of the first character of the answer in the context string
                    # End: Index of the character following the last character of the answer in the context string
                    # Answer: Plain text of the answer
                    question_first = bool(self.tokenizer.padding_side == "right")
                    enc = feature.encoding
                    # Sometimes the max probability token is in the middle of a word so:
                    # - we start by finding the right word containing the token with `token_to_word`
                    # - then we convert this word in a character span with `word_to_chars`
                    answers += [
                        {
                            "score": score.item(),
                            "start": enc.word_to_chars(
                                enc.token_to_word(s), sequence_index=1 if question_first else 0
                            )[0],
                            "end": enc.word_to_chars(enc.token_to_word(e), sequence_index=1 if question_first else 0)[
                                1
                            ],
                            "answer": example.context_text[
                                enc.word_to_chars(enc.token_to_word(s), sequence_index=1 if question_first else 0)[
                                    0
                                ] : enc.word_to_chars(enc.token_to_word(e), sequence_index=1 if question_first else 0)[
                                    1
                                ]
                            ],
                        }
                        for s, e, score in zip(starts, ends, scores)
                    ]
            if kwargs["handle_impossible_answer"]:
                answers.append({"score": min_null_score, "start": 0, "end": 0, "answer": ""})
@@ -2735,7 +2840,7 @@ def pipeline(
    tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    framework: Optional[str] = None,
    revision: Optional[str] = None,
-    use_fast: bool = False,
+    use_fast: bool = True,
    **kwargs
 ) -> Pipeline:
    """
@@ -2795,7 +2900,7 @@ def pipeline(
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
            artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
-        use_fast (:obj:`bool`, `optional`, defaults to :obj:`False`):
+        use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@@ -280,7 +280,7 @@ class AutoTokenizer:
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so ``revision`` can be any
                identifier allowed by git.
-            use_fast (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
                Whether or not to try to load the fast version of the tokenizer.
            kwargs (additional keyword arguments, `optional`):
                Will be passed to the Tokenizer ``__init__()`` method. Can be used to set special tokens like
@@ -308,7 +308,7 @@ class AutoTokenizer:
        if "bert-base-japanese" in str(pretrained_model_name_or_path):
            return BertJapaneseTokenizer.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
-        use_fast = kwargs.pop("use_fast", False)
+        use_fast = kwargs.pop("use_fast", True)
        if config.tokenizer_class is not None:
            if use_fast and not config.tokenizer_class.endswith("Fast"):
--- a/src/transformers/tokenization_roberta_fast.py
+++ b/src/transformers/tokenization_roberta_fast.py
@@ -18,6 +18,7 @@ from typing import List, Optional
 from .tokenization_gpt2_fast import GPT2TokenizerFast
 from .tokenization_roberta import RobertaTokenizer
 from .tokenization_utils_base import AddedToken
 from .utils import logging
@@ -172,6 +173,32 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
            **kwargs,
        )
    @property
    def mask_token(self) -> str:
        """
        :obj:`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while
        not having been set.
        Roberta tokenizer has a special mask token to be usble in the fill-mask pipeline. The mask token will greedily
        comprise the space before the `<mask>`.
        """
        if self._mask_token is None and self.verbose:
            logger.error("Using mask_token, but it is not set yet.")
            return None
        return str(self._mask_token)
    @mask_token.setter
    def mask_token(self, value):
        """
        Overriding the default behavior of the mask token to have it eat the space before it.
        This is needed to preserve backward compatibility with all the previously used models based on Roberta.
        """
        # Mask token behave like a normal word, i.e. include the space before it
        # So we set lstrip to True
        value = AddedToken(value, lstrip=True, rstrip=False) if isinstance(value, str) else value
        self._mask_token = value
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
        if token_ids_1 is None:
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -182,7 +182,9 @@ def to_py_obj(obj):
    """
    Convert a TensorFlow tensor, PyTorch tensor, Numpy array or python list to a python list.
    """
-    if isinstance(obj, (list, tuple)):
+    if isinstance(obj, (dict, BatchEncoding)):
        return {k: to_py_obj(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return [to_py_obj(o) for o in obj]
    elif is_tf_available() and isinstance(obj, tf.Tensor):
        return obj.numpy().tolist()
@@ -216,6 +218,9 @@ class BatchEncoding(UserDict):
            initialization.
        prepend_batch_axis (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether or not to add a batch axis when converting to tensors (see :obj:`tensor_type` above).
        n_sequences (:obj:`Optional[int]`, `optional`):
            You can give a tensor_type here to convert the lists of integers in PyTorch/TensorFlow/Numpy Tensors at
            initialization.
    """
    def __init__(
@@ -224,6 +229,7 @@ class BatchEncoding(UserDict):
        encoding: Optional[Union[EncodingFast, Sequence[EncodingFast]]] = None,
        tensor_type: Union[None, str, TensorType] = None,
        prepend_batch_axis: bool = False,
        n_sequences: Optional[int] = None,
    ):
        super().__init__(data)
@@ -232,8 +238,22 @@ class BatchEncoding(UserDict):
        self._encodings = encoding
        if n_sequences is None and encoding is not None and len(encoding):
            n_sequences = encoding[0].n_sequences
        self._n_sequences = n_sequences
        self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
    @property
    def n_sequences(self) -> Optional[int]:
        """
        :obj:`Optional[int]`: The number of sequences used to generate each sample from the batch encoded in this
        :class:`~transformers.BatchEncoding`. Currently can be one of :obj:`None` (unknown), :obj:`1` (a single
        sentence) or :obj:`2` (a pair of sentences)
        """
        return self.n_sequences
    @property
    def is_fast(self) -> bool:
        """
@@ -311,6 +331,27 @@ class BatchEncoding(UserDict):
            raise ValueError("tokens() is not available when using Python-based tokenizers")
        return self._encodings[batch_index].tokens
    def sequence_ids(self, batch_index: int = 0) -> List[Optional[int]]:
        """
        Return a list mapping the tokens to the id of their original sentences:
            - :obj:`None` for special tokens added around or between sequences,
            - :obj:`0` for tokens corresponding to words in the first sequence,
            - :obj:`1` for tokens corresponding to words in the second sequence when a pair of sequences was jointly
              encoded.
        Args:
            batch_index (:obj:`int`, `optional`, defaults to 0): The index to access in the batch.
        Returns:
            :obj:`List[Optional[int]]`: A list indicating the sequence id corresponding to each token. Special tokens
            added by the tokenizer are mapped to :obj:`None` and other tokens are mapped to the index of their
            corresponding sequence.
        """
        if not self._encodings:
            raise ValueError("sequence_ids() is not available when using Python-based tokenizers")
        return self._encodings[batch_index].sequence_ids
    def words(self, batch_index: int = 0) -> List[Optional[int]]:
        """
        Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
@@ -325,7 +366,67 @@ class BatchEncoding(UserDict):
        """
        if not self._encodings:
            raise ValueError("words() is not available when using Python-based tokenizers")
-        return self._encodings[batch_index].words
+        warnings.warn(
            "`BatchEncoding.words()` property is deprecated and should be replaced with the identical, "
            "but more self-explanatory `BatchEncoding.word_ids()` property.",
            FutureWarning,
        )
        return self.word_ids(batch_index)
    def word_ids(self, batch_index: int = 0) -> List[Optional[int]]:
        """
        Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
        Args:
            batch_index (:obj:`int`, `optional`, defaults to 0): The index to access in the batch.
        Returns:
            :obj:`List[Optional[int]]`: A list indicating the word corresponding to each token. Special tokens added by
            the tokenizer are mapped to :obj:`None` and other tokens are mapped to the index of their corresponding
            word (several tokens will be mapped to the same word index if they are parts of that word).
        """
        if not self._encodings:
            raise ValueError("word_ids() is not available when using Python-based tokenizers")
        return self._encodings[batch_index].word_ids
    def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
        """
        Get the index of the sequence represented by the given token. In the general use case, this method returns
        :obj:`0` for a single sequence or the first sequence of a pair, and :obj:`1` for the second sequence of a pair
        Can be called as:
        - ``self.token_to_sequence(token_index)`` if batch size is 1
        - ``self.token_to_sequence(batch_index, token_index)`` if batch size is greater than 1
        This method is particularly suited when the input sequences are provided as pre-tokenized sequences (i.e.,
        words are defined by the user). In this case it allows to easily associate encoded tokens with provided
        tokenized words.
        Args:
            batch_or_token_index (:obj:`int`):
                Index of the sequence in the batch. If the batch only comprises one sequence, this can be the index of
                the token in the sequence.
            token_index (:obj:`int`, `optional`):
                If a batch index is provided in `batch_or_token_index`, this can be the index of the token in the
                sequence.
        Returns:
            :obj:`int`: Index of the word in the input sequence.
        """
        if not self._encodings:
            raise ValueError("token_to_sequence() is not available when using Python based tokenizers")
        if token_index is not None:
            batch_index = batch_or_token_index
        else:
            batch_index = 0
            token_index = batch_or_token_index
        if batch_index < 0:
            batch_index = self._batch_size + batch_index
        if token_index < 0:
            token_index = self._seq_len + token_index
        return self._encodings[batch_index].token_to_sequence(token_index)
    def token_to_word(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
        """
@@ -365,9 +466,11 @@ class BatchEncoding(UserDict):
            token_index = self._seq_len + token_index
        return self._encodings[batch_index].token_to_word(token_index)
-    def word_to_tokens(self, batch_or_word_index: int, word_index: Optional[int] = None) -> Optional[TokenSpan]:
+    def word_to_tokens(
        self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
    ) -> Optional[TokenSpan]:
        """
-        Get the encoded token span corresponding to a word in the sequence of the batch.
+        Get the encoded token span corresponding to a word in a sequence of the batch.
        Token spans are returned as a :class:`~transformers.tokenization_utils_base.TokenSpan` with:
@@ -376,8 +479,9 @@ class BatchEncoding(UserDict):
        Can be called as:
-        - ``self.word_to_tokens(word_index)`` if batch size is 1
+        - ``self.word_to_tokens(word_index, sequence_index: int = 0)`` if batch size is 1
-        - ``self.word_to_tokens(batch_index, word_index)`` if batch size is greater or equal to 1
+        - ``self.word_to_tokens(batch_index, word_index, sequence_index: int = 0)`` if batch size is greater or equal
          to 1
        This method is particularly suited when the input sequences are provided as pre-tokenized sequences (i.e. words
        are defined by the user). In this case it allows to easily associate encoded tokens with provided tokenized
@@ -390,6 +494,9 @@ class BatchEncoding(UserDict):
            word_index (:obj:`int`, `optional`):
                If a batch index is provided in `batch_or_token_index`, this can be the index of the word in the
                sequence.
            sequence_index (:obj:`int`, `optional`, defaults to 0):
                If pair of sequences are encoded in the batch this can be used to specify which sequence in the pair (0
                or 1) the provided word index belongs to.
        Returns:
            Optional :class:`~transformers.tokenization_utils_base.TokenSpan` Span of tokens in the encoded sequence.
@@ -407,7 +514,7 @@ class BatchEncoding(UserDict):
            batch_index = self._batch_size + batch_index
        if word_index < 0:
            word_index = self._seq_len + word_index
-        span = self._encodings[batch_index].word_to_tokens(word_index)
+        span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
        return TokenSpan(*span) if span is not None else None
    def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> CharSpan:
@@ -446,7 +553,9 @@ class BatchEncoding(UserDict):
            token_index = batch_or_token_index
        return CharSpan(*(self._encodings[batch_index].token_to_chars(token_index)))
-    def char_to_token(self, batch_or_char_index: int, char_index: Optional[int] = None) -> int:
+    def char_to_token(
        self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0
    ) -> int:
        """
        Get the index of the token in the encoded output comprising a character in the original string for a sequence
        of the batch.
@@ -467,6 +576,9 @@ class BatchEncoding(UserDict):
            char_index (:obj:`int`, `optional`):
                If a batch index is provided in `batch_or_token_index`, this can be the index of the word in the
                sequence.
            sequence_index (:obj:`int`, `optional`, defaults to 0):
                If pair of sequences are encoded in the batch this can be used to specify which sequence in the pair (0
                or 1) the provided character index belongs to.
        Returns:
@@ -480,9 +592,11 @@ class BatchEncoding(UserDict):
        else:
            batch_index = 0
            char_index = batch_or_char_index
-        return self._encodings[batch_index].char_to_token(char_index)
+        return self._encodings[batch_index].char_to_token(char_index, sequence_index)
-    def word_to_chars(self, batch_or_word_index: int, word_index: Optional[int] = None) -> CharSpan:
+    def word_to_chars(
        self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
    ) -> CharSpan:
        """
        Get the character span in the original string corresponding to given word in a sequence of the batch.
@@ -503,6 +617,9 @@ class BatchEncoding(UserDict):
            word_index (:obj:`int`, `optional`):
                If a batch index is provided in `batch_or_token_index`, this can be the index of the word in the
                sequence.
            sequence_index (:obj:`int`, `optional`, defaults to 0):
                If pair of sequences are encoded in the batch this can be used to specify which sequence in the pair (0
                or 1) the provided word index belongs to.
        Returns:
            :obj:`CharSpan` or :obj:`List[CharSpan]`: Span(s) of the associated character or characters in the string.
@@ -520,9 +637,9 @@ class BatchEncoding(UserDict):
        else:
            batch_index = 0
            word_index = batch_or_word_index
-        return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index)))
+        return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index, sequence_index)))
-    def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None) -> int:
+    def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0) -> int:
        """
        Get the word in the original string corresponding to a character in the original string of a sequence of the
        batch.
@@ -543,6 +660,9 @@ class BatchEncoding(UserDict):
            char_index (:obj:`int`, `optional`):
                If a batch index is provided in `batch_or_token_index`, this can be the index of the character in the
                original string.
            sequence_index (:obj:`int`, `optional`, defaults to 0):
                If pair of sequences are encoded in the batch this can be used to specify which sequence in the pair (0
                or 1) the provided character index belongs to.
        Returns:
@@ -556,7 +676,7 @@ class BatchEncoding(UserDict):
        else:
            batch_index = 0
            char_index = batch_or_char_index
-        return self._encodings[batch_index].char_to_word(char_index)
+        return self._encodings[batch_index].char_to_word(char_index, sequence_index)
    def convert_to_tensors(
        self, tensor_type: Optional[Union[str, TensorType]] = None, prepend_batch_axis: bool = False
@@ -1872,6 +1992,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
                "Only fast tokenizers (instances of PretrainedTokenizerFast) can be saved in non legacy format."
            )
        save_directory = str(save_directory)
        added_tokens_file = os.path.join(
            save_directory, (filename_prefix + "-" if filename_prefix else "") + ADDED_TOKENS_FILE
        )
--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -169,9 +169,10 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-    ) -> Dict[str, Any]:
+    ) -> Tuple[Dict[str, Any], List[EncodingFast]]:
        """
-        Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict.
+        Convert the encoding representation (from low-level HuggingFace tokenizer output) to a python Dict and a list
        of encodings, take care of building a batch from overflowing tokens.
        Overflowing tokens are converted to additional examples (like batches) so the output values of the dict are
        lists (overflows) of lists (tokens).
@@ -203,7 +204,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
            if return_length:
                encoding_dict["length"].append(len(e.ids))
-        return encoding_dict
+        return encoding_dict, encodings
    def convert_tokens_to_ids(self, tokens: Union[str, List[str]]) -> Union[int, List[int]]:
        """
@@ -390,9 +391,12 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
        )
        # Convert encoding to dict
-        # `Tokens` has type: List[Dict[str, List[List[int]]]] or List[Dict[str, 2D-Tensor]]
+        # `Tokens` has type: Tuple[
        #                       List[Dict[str, List[List[int]]]] or List[Dict[str, 2D-Tensor]],
        #                       List[EncodingFast]
        #                    ]
        # with nested dimensions corresponding to batch, overflows, sequence length
-        tokens = [
+        tokens_and_encodings = [
            self._convert_encoding(
                encoding=encoding,
                return_token_type_ids=return_token_type_ids,
@@ -406,22 +410,27 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
            for encoding in encodings
        ]
-        # Convert the output to have dict[list] from list[dict]
+        # Convert the output to have dict[list] from list[dict] and remove the additional overflows dimension
-        sanitized = {}
+        # From (variable) shape (batch, overflows, sequence length) to ~ (batch * overflows, sequence length)
-        for key in tokens[0].keys():
+        # (we say ~ because the number of overflow varies with the example in the batch)
-            # To List[List[List[int]]] of shape (batch, overflows, sequence length)
+        #
-            stack = [e for item in tokens for e in item[key]]
+        # To match each overflowing sample with the original sample in the batch
-            sanitized[key] = stack
+        # we add an overflow_to_sample_mapping array (see below)
        sanitized_tokens = {}
        for key in tokens_and_encodings[0][0].keys():
            stack = [e for item, _ in tokens_and_encodings for e in item[key]]
            sanitized_tokens[key] = stack
        sanitized_encodings = [e for _, item in tokens_and_encodings for e in item]
        # If returning overflowing tokens, we need to return a mapping
        # from the batch idx to the original sample
        if return_overflowing_tokens:
            overflow_to_sample_mapping = []
-            for i, enc in enumerate(tokens):
+            for i, (toks, _) in enumerate(tokens_and_encodings):
-                overflow_to_sample_mapping += [i] * len(enc["input_ids"])
+                overflow_to_sample_mapping += [i] * len(toks["input_ids"])
-            sanitized["overflow_to_sample_mapping"] = overflow_to_sample_mapping
+            sanitized_tokens["overflow_to_sample_mapping"] = overflow_to_sample_mapping
-        return BatchEncoding(sanitized, encodings, tensor_type=return_tensors)
+        return BatchEncoding(sanitized_tokens, sanitized_encodings, tensor_type=return_tensors)
    def _encode_plus(
        self,
@@ -518,6 +527,8 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
        Fast tokenizers can also be saved in a unique JSON file containing {config + vocab + added-tokens} using the
        specific :meth:`~transformers.PreTrainedTokenizerFast._save_pretrained`
        """
        save_directory = str(save_directory)
        if legacy_format:
            added_tokens_file = os.path.join(
                save_directory, (filename_prefix + "-" if filename_prefix else "") + ADDED_TOKENS_FILE
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -1,10 +1,10 @@
 from typing import List, Optional
 from unittest import mock
 from transformers import is_tf_available, is_torch_available, pipeline
 # from transformers.pipelines import DefaultArgumentHandler, Pipeline
 from transformers.pipelines import Pipeline
 from transformers.testing_utils import _run_slow_tests, is_pipeline_test, require_tf, require_torch, slow
 from transformers.tokenization_utils_base import to_py_obj
 VALID_INPUTS = ["A simple string", ["list of strings"]]
@@ -13,9 +13,11 @@ VALID_INPUTS = ["A simple string", ["list of strings"]]
@is_pipeline_test
 class CustomInputPipelineCommonMixin:
    pipeline_task = None
-    pipeline_loading_kwargs = {}
+    pipeline_loading_kwargs = {}  # Additional kwargs to load the pipeline with
-    small_models = None  # Models tested without the @slow decorator
+    pipeline_running_kwargs = {}  # Additional kwargs to run the pipeline with
-    large_models = None  # Models tested with the @slow decorator
+    small_models = []  # Models tested without the @slow decorator
    large_models = []  # Models tested with the @slow decorator
    valid_inputs = VALID_INPUTS  # Some inputs which are valid to compare fast and slow tokenizers
    def setUp(self) -> None:
        if not is_tf_available() and not is_torch_available():
@@ -47,73 +49,11 @@ class CustomInputPipelineCommonMixin:
    @require_torch
    @slow
    def test_pt_defaults(self):
        pipeline(self.pipeline_task, framework="pt")
    @require_tf
    @slow
    def test_tf_defaults(self):
        pipeline(self.pipeline_task, framework="tf")
    @require_torch
    def test_torch_small(self):
        for model_name in self.small_models:
            nlp = pipeline(task=self.pipeline_task, model=model_name, tokenizer=model_name, framework="pt")
            self._test_pipeline(nlp)
    @require_tf
    def test_tf_small(self):
        for model_name in self.small_models:
            nlp = pipeline(task=self.pipeline_task, model=model_name, tokenizer=model_name, framework="tf")
            self._test_pipeline(nlp)
    @require_torch
    @slow
    def test_torch_large(self):
        for model_name in self.large_models:
            nlp = pipeline(task=self.pipeline_task, model=model_name, tokenizer=model_name, framework="pt")
            self._test_pipeline(nlp)
    @require_tf
    @slow
    def test_tf_large(self):
        for model_name in self.large_models:
            nlp = pipeline(task=self.pipeline_task, model=model_name, tokenizer=model_name, framework="tf")
            self._test_pipeline(nlp)
    def _test_pipeline(self, nlp: Pipeline):
        raise NotImplementedError
@is_pipeline_test
 class MonoInputPipelineCommonMixin:
    pipeline_task = None
    pipeline_loading_kwargs = {}  # Additional kwargs to load the pipeline with
    pipeline_running_kwargs = {}  # Additional kwargs to run the pipeline with
    small_models = []  # Models tested without the @slow decorator
    large_models = []  # Models tested with the @slow decorator
    mandatory_keys = {}  # Keys which should be in the output
    valid_inputs = VALID_INPUTS  # inputs which are valid
    invalid_inputs = [None]  # inputs which are not allowed
    expected_multi_result: Optional[List] = None
    expected_check_keys: Optional[List[str]] = None
    def setUp(self) -> None:
        if not is_tf_available() and not is_torch_available():
            return  # Currently no JAX pipelines
        for model_name in self.small_models:
            pipeline(self.pipeline_task, model=model_name, tokenizer=model_name, **self.pipeline_loading_kwargs)
        for model_name in self.large_models:
            pipeline(self.pipeline_task, model=model_name, tokenizer=model_name, **self.pipeline_loading_kwargs)
    @require_torch
    @slow
    def test_pt_defaults_loads(self):
        pipeline(self.pipeline_task, framework="pt", **self.pipeline_loading_kwargs)
    @require_tf
    @slow
-    def test_tf_defaults_loads(self):
+    def test_tf_defaults(self):
        pipeline(self.pipeline_task, framework="tf", **self.pipeline_loading_kwargs)
    @require_torch
@@ -166,6 +106,95 @@ class MonoInputPipelineCommonMixin:
            )
            self._test_pipeline(nlp)
    def _test_pipeline(self, nlp: Pipeline):
        raise NotImplementedError
    @require_torch
    def test_compare_slow_fast_torch(self):
        for model_name in self.small_models:
            nlp_slow = pipeline(
                task=self.pipeline_task,
                model=model_name,
                tokenizer=model_name,
                framework="pt",
                use_fast=False,
                **self.pipeline_loading_kwargs,
            )
            nlp_fast = pipeline(
                task=self.pipeline_task,
                model=model_name,
                tokenizer=model_name,
                framework="pt",
                use_fast=True,
                **self.pipeline_loading_kwargs,
            )
            self._compare_slow_fast_pipelines(nlp_slow, nlp_fast, method="forward")
    @require_tf
    def test_compare_slow_fast_tf(self):
        for model_name in self.small_models:
            nlp_slow = pipeline(
                task=self.pipeline_task,
                model=model_name,
                tokenizer=model_name,
                framework="tf",
                use_fast=False,
                **self.pipeline_loading_kwargs,
            )
            nlp_fast = pipeline(
                task=self.pipeline_task,
                model=model_name,
                tokenizer=model_name,
                framework="tf",
                use_fast=True,
                **self.pipeline_loading_kwargs,
            )
            self._compare_slow_fast_pipelines(nlp_slow, nlp_fast, method="call")
    def _compare_slow_fast_pipelines(self, nlp_slow: Pipeline, nlp_fast: Pipeline, method: str):
        """We check that the inputs to the models forward passes are identical for
        slow and fast tokenizers.
        """
        with mock.patch.object(
            nlp_slow.model, method, wraps=getattr(nlp_slow.model, method)
        ) as mock_slow, mock.patch.object(nlp_fast.model, method, wraps=getattr(nlp_fast.model, method)) as mock_fast:
            for inputs in self.valid_inputs:
                if isinstance(inputs, dict):
                    inputs.update(self.pipeline_running_kwargs)
                    _ = nlp_slow(**inputs)
                    _ = nlp_fast(**inputs)
                else:
                    _ = nlp_slow(inputs, **self.pipeline_running_kwargs)
                    _ = nlp_fast(inputs, **self.pipeline_running_kwargs)
                mock_slow.assert_called()
                mock_fast.assert_called()
                self.assertEqual(len(mock_slow.call_args_list), len(mock_fast.call_args_list))
                for mock_slow_call_args, mock_fast_call_args in zip(
                    mock_slow.call_args_list, mock_slow.call_args_list
                ):
                    slow_call_args, slow_call_kwargs = mock_slow_call_args
                    fast_call_args, fast_call_kwargs = mock_fast_call_args
                    slow_call_args, slow_call_kwargs = to_py_obj(slow_call_args), to_py_obj(slow_call_kwargs)
                    fast_call_args, fast_call_kwargs = to_py_obj(fast_call_args), to_py_obj(fast_call_kwargs)
                    self.assertEqual(slow_call_args, fast_call_args)
                    self.assertDictEqual(slow_call_kwargs, fast_call_kwargs)
@is_pipeline_test
 class MonoInputPipelineCommonMixin(CustomInputPipelineCommonMixin):
    """A version of the CustomInputPipelineCommonMixin
    with a predefined `_test_pipeline` method.
    """
    mandatory_keys = {}  # Keys which should be in the output
    invalid_inputs = [None]  # inputs which are not allowed
    expected_multi_result: Optional[List] = None
    expected_check_keys: Optional[List[str]] = None
    def _test_pipeline(self, nlp: Pipeline):
        self.assertIsNotNone(nlp)
@@ -199,76 +228,3 @@ class MonoInputPipelineCommonMixin:
                self.assertIn(key, result)
        self.assertRaises(Exception, nlp, self.invalid_inputs)
 # @is_pipeline_test
 # class DefaultArgumentHandlerTestCase(unittest.TestCase):
 #     def setUp(self) -> None:
 #         self.handler = DefaultArgumentHandler()
 #
 #     def test_kwargs_x(self):
 #         mono_data = {"X": "This is a sample input"}
 #         mono_args = self.handler(**mono_data)
 #
 #         self.assertTrue(isinstance(mono_args, list))
 #         self.assertEqual(len(mono_args), 1)
 #
 #         multi_data = {"x": ["This is a sample input", "This is a second sample input"]}
 #         multi_args = self.handler(**multi_data)
 #
 #         self.assertTrue(isinstance(multi_args, list))
 #         self.assertEqual(len(multi_args), 2)
 #
 #     def test_kwargs_data(self):
 #         mono_data = {"data": "This is a sample input"}
 #         mono_args = self.handler(**mono_data)
 #
 #         self.assertTrue(isinstance(mono_args, list))
 #         self.assertEqual(len(mono_args), 1)
 #
 #         multi_data = {"data": ["This is a sample input", "This is a second sample input"]}
 #         multi_args = self.handler(**multi_data)
 #
 #         self.assertTrue(isinstance(multi_args, list))
 #         self.assertEqual(len(multi_args), 2)
 #
 #     def test_multi_kwargs(self):
 #         mono_data = {"data": "This is a sample input", "X": "This is a sample input 2"}
 #         mono_args = self.handler(**mono_data)
 #
 #         self.assertTrue(isinstance(mono_args, list))
 #         self.assertEqual(len(mono_args), 2)
 #
 #         multi_data = {
 #             "data": ["This is a sample input", "This is a second sample input"],
 #             "test": ["This is a sample input 2", "This is a second sample input 2"],
 #         }
 #         multi_args = self.handler(**multi_data)
 #
 #         self.assertTrue(isinstance(multi_args, list))
 #         self.assertEqual(len(multi_args), 4)
 #
 #     def test_args(self):
 #         mono_data = "This is a sample input"
 #         mono_args = self.handler(mono_data)
 #
 #         self.assertTrue(isinstance(mono_args, list))
 #         self.assertEqual(len(mono_args), 1)
 #
 #         mono_data = ["This is a sample input"]
 #         mono_args = self.handler(mono_data)
 #
 #         self.assertTrue(isinstance(mono_args, list))
 #         self.assertEqual(len(mono_args), 1)
 #
 #         multi_data = ["This is a sample input", "This is a second sample input"]
 #         multi_args = self.handler(multi_data)
 #
 #         self.assertTrue(isinstance(multi_args, list))
 #         self.assertEqual(len(multi_args), 2)
 #
 #         multi_data = ["This is a sample input", "This is a second sample input"]
 #         multi_args = self.handler(*multi_data)
 #
 #         self.assertTrue(isinstance(multi_args, list))
 #         self.assertEqual(len(multi_args), 2)
--- a/tests/test_pipelines_dialog.py
+++ b/tests/test_pipelines_dialog.py
@@ -1,29 +0,0 @@
 import unittest
 from transformers.pipelines import Conversation, Pipeline
 from .test_pipelines_common import CustomInputPipelineCommonMixin
 class DialoguePipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
    pipeline_task = "conversational"
    small_models = []  # Default model - Models tested without the @slow decorator
    large_models = ["microsoft/DialoGPT-medium"]  # Models tested with the @slow decorator
    def _test_pipeline(self, nlp: Pipeline):
        valid_inputs = [Conversation("Hi there!"), [Conversation("Hi there!"), Conversation("How are you?")]]
        invalid_inputs = ["Hi there!", Conversation()]
        self.assertIsNotNone(nlp)
        mono_result = nlp(valid_inputs[0])
        self.assertIsInstance(mono_result, Conversation)
        multi_result = nlp(valid_inputs[1])
        self.assertIsInstance(multi_result, list)
        self.assertIsInstance(multi_result[0], Conversation)
        # Inactive conversations passed to the pipeline raise a ValueError
        self.assertRaises(ValueError, nlp, valid_inputs[1])
        for bad_input in invalid_inputs:
            self.assertRaises(Exception, nlp, bad_input)
        self.assertRaises(Exception, nlp, invalid_inputs)
--- a/tests/test_pipelines_ner.py
+++ b/tests/test_pipelines_ner.py
@@ -146,10 +146,10 @@ class NerPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
    @require_torch
    def test_pt_ignore_subwords_slow_tokenizer_raises(self):
        for model_name in self.small_models:
-            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
            with self.assertRaises(ValueError):
-                pipeline(task="ner", model=model_name, tokenizer=tokenizer, ignore_subwords=True)
+                pipeline(task="ner", model=model_name, tokenizer=tokenizer, ignore_subwords=True, use_fast=False)
    @require_torch
    def test_pt_defaults_slow_tokenizer(self):
--- a/tests/test_pipelines_question_answering.py
+++ b/tests/test_pipelines_question_answering.py
@@ -8,10 +8,22 @@ from .test_pipelines_common import CustomInputPipelineCommonMixin
 class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
    pipeline_task = "question-answering"
    pipeline_running_kwargs = {
        "padding": "max_length",
        "max_seq_len": 25,
        "doc_stride": 5,
    }  # Default is 'longest' but we use 'max_length' to test equivalence between slow/fast tokenizers
    small_models = [
        "sshleifer/tiny-distilbert-base-cased-distilled-squad"
    ]  # Models tested without the @slow decorator
    large_models = []  # Models tested with the @slow decorator
    valid_inputs = [
        {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
        {
            "question": "In what field is HuggingFace working ?",
            "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
        },
    ]
    def _test_pipeline(self, nlp: Pipeline):
        output_keys = {"score", "answer", "start", "end"}
--- a/tests/test_pipelines_zero_shot.py
+++ b/tests/test_pipelines_zero_shot.py
@@ -12,6 +12,18 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
        "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"
    ]  # Models tested without the @slow decorator
    large_models = ["roberta-large-mnli"]  # Models tested with the @slow decorator
    valid_inputs = [
        {"sequences": "Who are you voting for in 2020?", "candidate_labels": "politics"},
        {"sequences": "Who are you voting for in 2020?", "candidate_labels": ["politics"]},
        {"sequences": "Who are you voting for in 2020?", "candidate_labels": "politics, public health"},
        {"sequences": "Who are you voting for in 2020?", "candidate_labels": ["politics", "public health"]},
        {"sequences": ["Who are you voting for in 2020?"], "candidate_labels": "politics"},
        {
            "sequences": "Who are you voting for in 2020?",
            "candidate_labels": "politics",
            "hypothesis_template": "This text is about {}",
        },
    ]
    def _test_scores_sum_to_one(self, result):
        sum = 0.0
--- a/tests/test_retrieval_rag.py
+++ b/tests/test_retrieval_rag.py
@@ -9,7 +9,7 @@ from unittest.mock import patch
 import numpy as np
 from datasets import Dataset
-import faiss
+from transformers import is_faiss_available
 from transformers.configuration_bart import BartConfig
 from transformers.configuration_dpr import DPRConfig
 from transformers.configuration_rag import RagConfig
@@ -27,6 +27,10 @@ from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
 from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
 if is_faiss_available():
    import faiss
@require_faiss
@require_datasets
 class RagRetrieverTest(TestCase):
--- a/tests/test_tokenization_auto.py
+++ b/tests/test_tokenization_auto.py
@@ -116,5 +116,5 @@ class AutoTokenizerTest(unittest.TestCase):
    @require_tokenizers
    def test_from_pretrained_use_fast_toggle(self):
-        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizer)
+        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=False), BertTokenizer)
-        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased", use_fast=True), BertTokenizerFast)
+        self.assertIsInstance(AutoTokenizer.from_pretrained("bert-base-cased"), BertTokenizerFast)
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -576,6 +576,42 @@ class TokenizerTesterMixin:
                    sequences, mask = information["input_ids"], information["token_type_ids"]
                    self.assertEqual(len(sequences), len(mask))
    def test_token_type_ids(self):
        tokenizers = self.get_tokenizers()
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                seq_0 = "Test this method."
                # We want to have sequence 0 and sequence 1 are tagged
                # respectively with 0 and 1 token_ids
                # (regardeless of weither the model use token type ids)
                # We use this assumption in the QA pipeline among other place
                output = tokenizer(seq_0, return_token_type_ids=True)
                self.assertIn(0, output["token_type_ids"])
    def test_sequence_ids(self):
        tokenizers = self.get_tokenizers()
        for tokenizer in tokenizers:
            if not tokenizer.is_fast:
                continue
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                seq_0 = "Test this method."
                seq_1 = "With these inputs."
                # We want to have sequence 0 and sequence 1 are tagged
                # respectively with 0 and 1 token_ids
                # (regardeless of weither the model use token type ids)
                # We use this assumption in the QA pipeline among other place
                output = tokenizer(seq_0)
                self.assertIn(0, output.sequence_ids())
                output = tokenizer(seq_0, seq_1)
                self.assertIn(0, output.sequence_ids())
                self.assertIn(1, output.sequence_ids())
                if tokenizer.num_special_tokens_to_add(pair=True):
                    self.assertIn(None, output.sequence_ids())
    def test_number_of_added_tokens(self):
        tokenizers = self.get_tokenizers(do_lower_case=False)
        for tokenizer in tokenizers:
@@ -1878,6 +1914,144 @@ class TokenizerTesterMixin:
                    batch_encoding.word_to_chars(last_batch_index, last_word_index).end, last_char_index + 1
                )
                # Assert token_to_sequence
                self.assertEqual(encoding.token_to_sequence(num_tokens // 2), 0)
                self.assertEqual(encoding.token_to_sequence(0, num_tokens // 2), 0)
                self.assertEqual(batch_encoding.token_to_sequence(1, num_tokens // 2), 0)
                self.assertEqual(batch_encoding.token_to_sequence(0, num_tokens // 2), 0)
                self.assertEqual(batch_encoding.token_to_sequence(last_batch_index, num_tokens // 2), 0)
                # Pair of input sequences
                words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"]
                text = " ".join(words)
                pair_words = ["Amazing", "example", "full", "of", "inspiration"]
                pair_text = " ".join(pair_words)
                batch_size = 3
                index_word_in_first_seq = words.index("inspiration")
                index_word_in_pair_seq = pair_words.index("inspiration")
                index_char_in_first_seq = text.find("inspiration")
                index_char_in_pair_seq = pair_text.find("inspiration")
                pair_encoding = tokenizer_r.encode_plus(text, pair_text, add_special_tokens=False)
                pair_batch_encoding = tokenizer_r.batch_encode_plus(
                    [(text, pair_text)] * batch_size, add_special_tokens=False
                )
                num_tokens = len(encoding["input_ids"])
                last_word_index = len(words) - 1
                last_token_index = num_tokens - 1
                last_batch_index = batch_size - 1
                last_char_index = len(text) - 1
                # Assert word_to_tokens
                self.assertNotEqual(
                    pair_encoding.word_to_tokens(index_word_in_first_seq, sequence_index=0).start,
                    pair_encoding.word_to_tokens(index_word_in_pair_seq, sequence_index=1).start,
                )
                self.assertEqual(
                    pair_encoding["input_ids"][
                        pair_encoding.word_to_tokens(index_word_in_first_seq, sequence_index=0).start
                    ],
                    pair_encoding["input_ids"][
                        pair_encoding.word_to_tokens(index_word_in_pair_seq, sequence_index=1).start
                    ],
                )
                self.assertNotEqual(
                    pair_batch_encoding.word_to_tokens(1, index_word_in_first_seq, sequence_index=0).start,
                    pair_batch_encoding.word_to_tokens(1, index_word_in_pair_seq, sequence_index=1).start,
                )
                self.assertEqual(
                    pair_batch_encoding["input_ids"][1][
                        pair_batch_encoding.word_to_tokens(1, index_word_in_first_seq, sequence_index=0).start
                    ],
                    pair_batch_encoding["input_ids"][1][
                        pair_batch_encoding.word_to_tokens(1, index_word_in_pair_seq, sequence_index=1).start
                    ],
                )
                # Assert char_to_token
                self.assertNotEqual(
                    pair_encoding.char_to_token(index_char_in_first_seq, sequence_index=0),
                    pair_encoding.char_to_token(index_char_in_pair_seq, sequence_index=1),
                )
                self.assertEqual(
                    pair_encoding["input_ids"][pair_encoding.char_to_token(index_char_in_first_seq, sequence_index=0)],
                    pair_encoding["input_ids"][pair_encoding.char_to_token(index_char_in_pair_seq, sequence_index=1)],
                )
                self.assertNotEqual(
                    pair_batch_encoding.char_to_token(1, index_char_in_first_seq, sequence_index=0),
                    pair_batch_encoding.char_to_token(1, index_char_in_pair_seq, sequence_index=1),
                )
                self.assertEqual(
                    pair_batch_encoding["input_ids"][1][
                        pair_batch_encoding.char_to_token(1, index_char_in_first_seq, sequence_index=0)
                    ],
                    pair_batch_encoding["input_ids"][1][
                        pair_batch_encoding.char_to_token(1, index_char_in_pair_seq, sequence_index=1)
                    ],
                )
                # Assert char_to_word
                self.assertNotEqual(
                    pair_encoding.char_to_word(index_char_in_first_seq, sequence_index=0),
                    pair_encoding.char_to_word(index_char_in_pair_seq, sequence_index=1),
                )
                self.assertEqual(
                    words[pair_encoding.char_to_word(index_char_in_first_seq, sequence_index=0)],
                    pair_words[pair_encoding.char_to_word(index_char_in_pair_seq, sequence_index=1)],
                )
                self.assertNotEqual(
                    pair_batch_encoding.char_to_word(1, index_char_in_first_seq, sequence_index=0),
                    pair_batch_encoding.char_to_word(1, index_char_in_pair_seq, sequence_index=1),
                )
                self.assertEqual(
                    words[pair_batch_encoding.char_to_word(1, index_char_in_first_seq, sequence_index=0)],
                    pair_words[pair_batch_encoding.char_to_word(1, index_char_in_pair_seq, sequence_index=1)],
                )
                # Assert word_to_chars
                self.assertNotEqual(
                    pair_encoding.word_to_chars(index_word_in_first_seq, sequence_index=0).start,
                    pair_encoding.word_to_chars(index_word_in_pair_seq, sequence_index=1).start,
                )
                self.assertEqual(
                    text[pair_encoding.word_to_chars(index_word_in_first_seq, sequence_index=0).start],
                    pair_text[pair_encoding.word_to_chars(index_word_in_pair_seq, sequence_index=1).start],
                )
                self.assertNotEqual(
                    pair_batch_encoding.word_to_chars(1, index_word_in_first_seq, sequence_index=0).start,
                    pair_batch_encoding.word_to_chars(1, index_word_in_pair_seq, sequence_index=1).start,
                )
                self.assertEqual(
                    text[pair_batch_encoding.word_to_chars(1, index_word_in_first_seq, sequence_index=0).start],
                    pair_text[pair_batch_encoding.word_to_chars(1, index_word_in_pair_seq, sequence_index=1).start],
                )
                # Assert token_to_sequence
                pair_encoding = tokenizer_r.encode_plus(text, pair_text, add_special_tokens=True)
                pair_sequence_ids = [
                    pair_encoding.token_to_sequence(i) for i in range(len(pair_encoding["input_ids"]))
                ]
                self.assertIn(0, pair_sequence_ids)
                self.assertIn(1, pair_sequence_ids)
                if tokenizer_r.num_special_tokens_to_add(pair=True):
                    self.assertIn(None, pair_sequence_ids)
                pair_batch_encoding = tokenizer_r.batch_encode_plus(
                    [(text, pair_text)] * batch_size, add_special_tokens=True
                )
                pair_batch_sequence_ids = [
                    pair_batch_encoding.token_to_sequence(1, i)
                    for i in range(len(pair_batch_encoding["input_ids"][0]))
                ]
                self.assertIn(0, pair_batch_sequence_ids)
                self.assertIn(1, pair_batch_sequence_ids)
                if tokenizer_r.num_special_tokens_to_add(pair=True):
                    self.assertIn(None, pair_batch_sequence_ids)
    def test_tokenization_python_rust_equals(self):
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)):
--- a/tests/test_tokenization_rag.py
+++ b/tests/test_tokenization_rag.py
@@ -4,13 +4,12 @@ import shutil
 import tempfile
 from unittest import TestCase
 from transformers import BartTokenizer, BartTokenizerFast, DPRQuestionEncoderTokenizer, DPRQuestionEncoderTokenizerFast
 from transformers.configuration_bart import BartConfig
 from transformers.configuration_dpr import DPRConfig
 from transformers.file_utils import is_datasets_available, is_faiss_available, is_torch_available
-from transformers.testing_utils import require_datasets, require_faiss, require_torch, slow
+from transformers.testing_utils import require_datasets, require_faiss, require_tokenizers, require_torch, slow
 from transformers.tokenization_bart import BartTokenizer
 from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
 from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
 from transformers.tokenization_roberta import VOCAB_FILES_NAMES as BART_VOCAB_FILES_NAMES
@@ -96,6 +95,7 @@ class RagTokenizerTest(TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)
    @require_tokenizers
    def test_save_load_pretrained_with_saved_config(self):
        save_dir = os.path.join(self.tmpdirname, "rag_tokenizer")
@@ -104,10 +104,10 @@ class RagTokenizerTest(TestCase):
        rag_config.save_pretrained(save_dir)
        rag_tokenizer.save_pretrained(save_dir)
        new_rag_tokenizer = RagTokenizer.from_pretrained(save_dir, config=rag_config)
-        self.assertIsInstance(new_rag_tokenizer.question_encoder, DPRQuestionEncoderTokenizer)
+        self.assertIsInstance(new_rag_tokenizer.question_encoder, DPRQuestionEncoderTokenizerFast)
-        self.assertEqual(new_rag_tokenizer.question_encoder.vocab, rag_tokenizer.question_encoder.vocab)
+        self.assertEqual(new_rag_tokenizer.question_encoder.get_vocab(), rag_tokenizer.question_encoder.get_vocab())
-        self.assertIsInstance(new_rag_tokenizer.generator, BartTokenizer)
+        self.assertIsInstance(new_rag_tokenizer.generator, BartTokenizerFast)
-        self.assertEqual(new_rag_tokenizer.generator.encoder, rag_tokenizer.generator.encoder)
+        self.assertEqual(new_rag_tokenizer.generator.get_vocab(), rag_tokenizer.generator.get_vocab())
    @slow
    def test_pretrained_token_nq_tokenizer(self):
--- a/tests/test_tokenization_xlm_prophetnet.py
+++ b/tests/test_tokenization_xlm_prophetnet.py
@@ -18,7 +18,7 @@ import os
 import unittest
 from transformers.file_utils import cached_property
-from transformers.testing_utils import slow
+from transformers.testing_utils import require_sentencepiece, slow
 from transformers.tokenization_xlm_prophetnet import SPIECE_UNDERLINE, XLMProphetNetTokenizer
 from .test_tokenization_common import TokenizerTesterMixin
@@ -27,6 +27,7 @@ from .test_tokenization_common import TokenizerTesterMixin
 SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
@require_sentencepiece
 class XLMProphetNetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    tokenizer_class = XLMProphetNetTokenizer