clean_up_tokenization_spaces=False if unset (#31938)
* clean_up_tokenization_spaces=False if unset * deprecate warning * updating param for old models * update models * make fix-copies * fix-copies and update bert models * warning msg * update prophet and clvp * updating test since space before is arbitrarily removed * remove warning for 4.45
This commit is contained in:
@@ -249,7 +249,7 @@ class Wav2Vec2PhonemeCTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
# fmt: on
|
||||
|
||||
batch_tokens = tokenizer.batch_decode(sample_ids)
|
||||
self.assertEqual(batch_tokens, ["k s ɾ ɾ l ɭʲ!?!? $$$", "j ð s j ð s oːɹ $$$"])
|
||||
self.assertEqual(batch_tokens, ["k s ɾ ɾ l ɭʲ ! ? ! ? $$$", "j ð s j ð s oːɹ $$$"])
|
||||
|
||||
@staticmethod
|
||||
def get_from_offsets(offsets, key):
|
||||
|
||||
Reference in New Issue
Block a user