From 391db836ab7ed2ca61c51a7cf1b135b6ab92be58 Mon Sep 17 00:00:00 2001 From: thomwolf Date: Tue, 1 Oct 2019 19:09:13 -0400 Subject: [PATCH] fix #1260 - remove special logic for decoding pairs of sequence --- transformers/tokenization_utils.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/transformers/tokenization_utils.py b/transformers/tokenization_utils.py index 1e20588f83..db9e9cd72e 100644 --- a/transformers/tokenization_utils.py +++ b/transformers/tokenization_utils.py @@ -933,20 +933,11 @@ class PreTrainedTokenizer(object): sub_texts.append(self.convert_tokens_to_string(current_sub_text)) text = ''.join(sub_texts) - if self._sep_token is not None and self._sep_token in text: - text = text.replace(self._cls_token, self._sep_token) - split_text = list(filter(lambda sentence: len(sentence) > 0, text.split(self._sep_token))) - if clean_up_tokenization_spaces: - clean_text = [self.clean_up_tokenization(text) for text in split_text] - return clean_text - else: - return split_text + if clean_up_tokenization_spaces: + clean_text = self.clean_up_tokenization(text) + return clean_text else: - if clean_up_tokenization_spaces: - clean_text = self.clean_up_tokenization(text) - return clean_text - else: - return text + return text @property def special_tokens_map(self):