[breaking|pipelines|tokenizers] Adding slow-fast tokenizers equivalence tests pipelines - Removing sentencepiece as a required dependency (#8073)

* Fixing roberta for slow-fast tests

* WIP getting equivalence on pipelines

* slow-to-fast equivalence - working on question-answering pipeline

* optional FAISS tests

* Pipeline Q&A

* Move pipeline tests to their own test job again

* update tokenizer to add sequence id methods

* update to tokenizers 0.9.4

* set sentencepiecce as optional

* clean up squad

* clean up pipelines to use sequence_ids

* style/quality

* wording

* Switch to use_fast = True by default

* update tests for use_fast at True by default

* fix rag tokenizer test

* removing protobuf from required dependencies

* fix NER test for use_fast = True by default

* fixing example tests (Q&A examples use slow tokenizers for now)

* protobuf in main deps extras["sentencepiece"] and example deps

* fix protobug install test

* try to fix seq2seq by switching to slow tokenizers for now

* Update src/transformers/tokenization_utils_base.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

* Update src/transformers/tokenization_utils_base.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
Thomas Wolf
2020-11-15 22:50:59 +01:00
committed by GitHub
parent 24184e73c4
commit f4e04cd2c6
23 changed files with 689 additions and 262 deletions

View File

@@ -96,13 +96,13 @@ else:
extras["retrieval"] = ["faiss-cpu", "datasets"]
extras["flax"] = ["jaxlib==0.1.55", "jax>=0.2.0", "flax==0.2.2"]
extras["tokenizers"] = ["tokenizers==0.9.2"]
extras["tokenizers"] = ["tokenizers==0.9.4"]
extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
extras["modelcreation"] = ["cookiecutter==1.7.2"]
extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
extras["sentencepiece"] = ["sentencepiece==0.1.91"]
extras["sentencepiece"] = ["sentencepiece==0.1.91", "protobuf"]
extras["retrieval"] = ["faiss-cpu", "datasets"]
extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"] + extras["modelcreation"]
# sphinx-rtd-theme==0.5.0 introduced big changes in the style.
@@ -130,7 +130,7 @@ setup(
packages=find_packages("src"),
install_requires=[
"numpy",
"tokenizers == 0.9.3",
"tokenizers == 0.9.4",
# dataclasses for Python versions that don't have it
"dataclasses;python_version<'3.7'",
# utilities from PyPA to e.g. compare versions
@@ -143,9 +143,6 @@ setup(
"tqdm >= 4.27",
# for OpenAI GPT
"regex != 2019.12.17",
# for SentencePiece models
"sentencepiece == 0.1.91",
"protobuf",
# for XLM
"sacremoses",
],