Fix + Test (#8049)

This commit is contained in:
Lysandre Debut
2020-10-26 16:32:27 +00:00
committed by GitHub
parent 664c7ec453
commit cbad90d86d
2 changed files with 12 additions and 0 deletions

View File

@@ -75,6 +75,15 @@ class BlenderbotSmallTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
assert src_text != decoded # I wish it did!
assert decoded == "i am a small frog ."
def test_empty_word_small_tok(self):
tok = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot-90M")
src_text = "I am a small frog ."
src_text_dot = "."
encoded = tok(src_text)["input_ids"]
encoded_dot = tok(src_text_dot)["input_ids"]
assert encoded[-1] == encoded_dot[0]
class Blenderbot3BTokenizerTests(unittest.TestCase):
@cached_property