From ab984a8b720972651dbe1f48161f4df8bcc09178 Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Thu, 19 Sep 2019 15:01:33 +0200 Subject: [PATCH] Python 2 compatibility --- pytorch_transformers/tokenization_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pytorch_transformers/tokenization_utils.py b/pytorch_transformers/tokenization_utils.py index b32d6daeb6..f2cb383143 100644 --- a/pytorch_transformers/tokenization_utils.py +++ b/pytorch_transformers/tokenization_utils.py @@ -707,14 +707,14 @@ class PreTrainedTokenizer(object): """ if text_pair is None: if add_special_tokens: - sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text + sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text return self.add_special_tokens_single_sequence(sequence_tokens) else: - ids = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text + ids = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text return ids - first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, str) else text - second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, str) else text_pair + first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, six.string_types) else text + second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, six.string_types) else text_pair if add_special_tokens: return self.add_special_tokens_sequence_pair(first_sentence_tokens, second_sentence_tokens) @@ -754,7 +754,7 @@ class PreTrainedTokenizer(object): information = {} if text_pair is None: - sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text + sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text if add_special_tokens: information = self.prepare_for_model(sequence_tokens, max_length, stride) else: @@ -766,8 +766,8 @@ class PreTrainedTokenizer(object): if output_mask: information["mask"] = [0] * len(information["sequence"]) else: - first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, str) else text - second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, str) else text_pair + first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, six.string_types) else text + second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, six.string_types) else text_pair if add_special_tokens: information = self.prepare_pair_for_model(