XLM tokenizer should encode with bos token (#3791)
* XLM tokenizer should encode with bos token * Update tests
This commit is contained in:
@@ -96,5 +96,5 @@ class XLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
|
||||
encoded_pair = tokenizer.build_inputs_with_special_tokens(text, text_2)
|
||||
|
||||
assert encoded_sentence == [1] + text + [1]
|
||||
assert encoded_pair == [1] + text + [1] + text_2 + [1]
|
||||
assert encoded_sentence == [0] + text + [1]
|
||||
assert encoded_pair == [0] + text + [1] + text_2 + [1]
|
||||
|
||||
Reference in New Issue
Block a user