[cleanup] test_tokenization_common.py (#4390)

This commit is contained in:
Sam Shleifer
2020-05-19 10:46:55 -04:00
committed by GitHub
parent 8f1d047148
commit 07dd7c2fd8
13 changed files with 62 additions and 98 deletions

View File

@@ -60,9 +60,6 @@ class BertJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
def get_tokenizer(self, **kwargs):
return BertJapaneseTokenizer.from_pretrained(self.tmpdirname, **kwargs)
def get_input_output_texts(self):
input_text = "こんにちは、世界。 \nこんばんは、世界。"
output_text = "こんにちは 、 世界 。 こんばんは 、 世界 。"