[breaking|pipelines|tokenizers] Adding slow-fast tokenizers equivalence tests pipelines - Removing sentencepiece as a required dependency (#8073)
* Fixing roberta for slow-fast tests * WIP getting equivalence on pipelines * slow-to-fast equivalence - working on question-answering pipeline * optional FAISS tests * Pipeline Q&A * Move pipeline tests to their own test job again * update tokenizer to add sequence id methods * update to tokenizers 0.9.4 * set sentencepiecce as optional * clean up squad * clean up pipelines to use sequence_ids * style/quality * wording * Switch to use_fast = True by default * update tests for use_fast at True by default * fix rag tokenizer test * removing protobuf from required dependencies * fix NER test for use_fast = True by default * fixing example tests (Q&A examples use slow tokenizers for now) * protobuf in main deps extras["sentencepiece"] and example deps * fix protobug install test * try to fix seq2seq by switching to slow tokenizers for now * Update src/transformers/tokenization_utils_base.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/tokenization_utils_base.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
@@ -8,10 +8,22 @@ from .test_pipelines_common import CustomInputPipelineCommonMixin
|
||||
|
||||
class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
|
||||
pipeline_task = "question-answering"
|
||||
pipeline_running_kwargs = {
|
||||
"padding": "max_length",
|
||||
"max_seq_len": 25,
|
||||
"doc_stride": 5,
|
||||
} # Default is 'longest' but we use 'max_length' to test equivalence between slow/fast tokenizers
|
||||
small_models = [
|
||||
"sshleifer/tiny-distilbert-base-cased-distilled-squad"
|
||||
] # Models tested without the @slow decorator
|
||||
large_models = [] # Models tested with the @slow decorator
|
||||
valid_inputs = [
|
||||
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
|
||||
{
|
||||
"question": "In what field is HuggingFace working ?",
|
||||
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
||||
},
|
||||
]
|
||||
|
||||
def _test_pipeline(self, nlp: Pipeline):
|
||||
output_keys = {"score", "answer", "start", "end"}
|
||||
|
||||
Reference in New Issue
Block a user