Add sudachi and jumanpp tokenizers for bert_japanese (#19043)
* add sudachipy and jumanpp tokenizers for bert_japanese * use ImportError instead of ModuleNotFoundError in SudachiTokenizer and JumanppTokenizer * put test cases of test_tokenization_bert_japanese in one line * add require_sudachi and require_jumanpp decorator for testing * add sudachi and pyknp(jumanpp) to dependencies * remove sudachi_dict_small and sudachi_dict_full from dependencies * empty commit for ci
This commit is contained in:
5
setup.py
5
setup.py
@@ -170,6 +170,9 @@ _deps = [
|
||||
"unidic_lite>=1.0.7",
|
||||
"uvicorn",
|
||||
"beautifulsoup4",
|
||||
"sudachipy>=0.6.6",
|
||||
"sudachidict_core>=20220729",
|
||||
"pyknp>=0.6.1",
|
||||
]
|
||||
|
||||
|
||||
@@ -239,7 +242,7 @@ class DepsTableUpdateCommand(Command):
|
||||
|
||||
extras = {}
|
||||
|
||||
extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic")
|
||||
extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic", "sudachipy", "sudachidict_core", "pyknp")
|
||||
extras["sklearn"] = deps_list("scikit-learn")
|
||||
|
||||
extras["tf"] = deps_list("tensorflow", "onnxconverter-common", "tf2onnx", "tensorflow-text")
|
||||
|
||||
Reference in New Issue
Block a user