Updated DistilBERT test to reflect the sequence encoding
This commit is contained in:
@@ -39,8 +39,8 @@ class DistilBertTokenizationTest(BertTokenizationTest):
|
|||||||
encoded_sentence = tokenizer.add_special_tokens_single_sentence(text)
|
encoded_sentence = tokenizer.add_special_tokens_single_sentence(text)
|
||||||
encoded_pair = tokenizer.add_special_tokens_sentences_pair(text, text_2)
|
encoded_pair = tokenizer.add_special_tokens_sentences_pair(text, text_2)
|
||||||
|
|
||||||
assert encoded_sentence == [101] + text + [102]
|
assert encoded_sentence == text
|
||||||
assert encoded_pair == [101] + text + [102] + text_2 + [102]
|
assert encoded_pair == text + [102] + text_2
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
Reference in New Issue
Block a user