Fix for #3865. PretrainedTokenizer mapped " do not" into " don't" when .decode(...) is called. Removed the " do not" --> " don't" mapping from clean_up_tokenization(...). (#4024)
This commit is contained in:
@@ -2195,7 +2195,6 @@ class PreTrainedTokenizer(SpecialTokensMixin):
|
|||||||
.replace(" ' ", "'")
|
.replace(" ' ", "'")
|
||||||
.replace(" n't", "n't")
|
.replace(" n't", "n't")
|
||||||
.replace(" 'm", "'m")
|
.replace(" 'm", "'m")
|
||||||
.replace(" do not", " don't")
|
|
||||||
.replace(" 's", "'s")
|
.replace(" 's", "'s")
|
||||||
.replace(" 've", "'ve")
|
.replace(" 've", "'ve")
|
||||||
.replace(" 're", "'re")
|
.replace(" 're", "'re")
|
||||||
|
|||||||
Reference in New Issue
Block a user