update pyknp to rhoknp (#20890)

* update pyknp to rhoknp

* fix linter

* fix linter

* fix linter

* fix linter

* fix linter

* support rhoknp==1.1.0, fix testcase
This commit is contained in:
Hao Wang
2022-12-31 15:22:26 +09:00
committed by GitHub
parent 092d4d49dd
commit 375801d5e6
5 changed files with 21 additions and 10 deletions

View File

@@ -318,6 +318,15 @@ class BertJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
["アップル", "ストア", "", "iPhone", "8", "", "発売", "", "れた", ""],
)
@require_jumanpp
def test_jumanpp_tokenizer_ext(self):
tokenizer = JumanppTokenizer()
self.assertListEqual(
tokenizer.tokenize("ありがとうございますm(_ _)m見つけるのが大変です。"),
["ありがとう", "ございます", "m(_ _)m", "見つける", "", "", "大変です", ""],
)
def test_wordpiece_tokenizer(self):
vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "こんにちは", "こん", "にちは", "ばんは", "##こん", "##にちは", "##ばんは"]