Support additional dictionaries for BERT Japanese tokenizers (#6515)

* Update BERT Japanese tokenizers

* Update CircleCI config to download unidic

* Specify to use the latest dictionary packages
This commit is contained in:
Masatoshi Suzuki
2020-08-17 13:00:23 +09:00
committed by GitHub
parent 423eb5b1d7
commit 48c6c6139f
4 changed files with 97 additions and 15 deletions

View File

@@ -65,7 +65,7 @@ if stale_egg_info.exists():
extras = {}
extras["ja"] = ["fugashi>=1.0", "ipadic>=1.0,<2.0"]
extras["ja"] = ["fugashi>=1.0", "ipadic>=1.0.0,<2.0", "unidic_lite>=1.0.7", "unidic>=1.0.2"]
extras["sklearn"] = ["scikit-learn"]
# keras2onnx and onnxconverter-common version is specific through a commit until 1.7.0 lands on pypi