Overwrite get_clean_sequence as this was causing a bottleneck (#13183)
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
|
||||
|
||||
import unittest
|
||||
from typing import Tuple
|
||||
|
||||
from transformers import AddedToken, LukeTokenizer
|
||||
from transformers.testing_utils import require_torch, slow
|
||||
@@ -81,6 +82,11 @@ class Luke(TokenizerTesterMixin, unittest.TestCase):
|
||||
assert encoded_sentence == encoded_text_from_decode
|
||||
assert encoded_pair == encoded_pair_from_decode
|
||||
|
||||
def get_clean_sequence(self, tokenizer, max_length=20) -> Tuple[str, list]:
|
||||
txt = "Beyonce lives in Los Angeles"
|
||||
ids = tokenizer.encode(txt, add_special_tokens=False)
|
||||
return txt, ids
|
||||
|
||||
def test_space_encoding(self):
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user