Add custom tokenizer for zh and ja

This commit is contained in:
Shijie Wu
2019-08-23 20:27:52 -04:00
parent 436ce07218
commit e85123d398
3 changed files with 61 additions and 22 deletions

View File

@@ -56,7 +56,11 @@ setup(
'tqdm',
'regex',
'sentencepiece',
'sacremoses'],
'sacremoses',
'pythainlp',
'kytea',
'nltk',
'jieba'],
entry_points={
'console_scripts': [
"pytorch_transformers=pytorch_transformers.__main__:main",