Python 2 compatibility
This commit is contained in:
@@ -707,14 +707,14 @@ class PreTrainedTokenizer(object):
|
|||||||
"""
|
"""
|
||||||
if text_pair is None:
|
if text_pair is None:
|
||||||
if add_special_tokens:
|
if add_special_tokens:
|
||||||
sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text
|
sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text
|
||||||
return self.add_special_tokens_single_sequence(sequence_tokens)
|
return self.add_special_tokens_single_sequence(sequence_tokens)
|
||||||
else:
|
else:
|
||||||
ids = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text
|
ids = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, str) else text
|
first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, six.string_types) else text
|
||||||
second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, str) else text_pair
|
second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, six.string_types) else text_pair
|
||||||
|
|
||||||
if add_special_tokens:
|
if add_special_tokens:
|
||||||
return self.add_special_tokens_sequence_pair(first_sentence_tokens, second_sentence_tokens)
|
return self.add_special_tokens_sequence_pair(first_sentence_tokens, second_sentence_tokens)
|
||||||
@@ -754,7 +754,7 @@ class PreTrainedTokenizer(object):
|
|||||||
information = {}
|
information = {}
|
||||||
|
|
||||||
if text_pair is None:
|
if text_pair is None:
|
||||||
sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, str) else text
|
sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs)) if isinstance(text, six.string_types) else text
|
||||||
if add_special_tokens:
|
if add_special_tokens:
|
||||||
information = self.prepare_for_model(sequence_tokens, max_length, stride)
|
information = self.prepare_for_model(sequence_tokens, max_length, stride)
|
||||||
else:
|
else:
|
||||||
@@ -766,8 +766,8 @@ class PreTrainedTokenizer(object):
|
|||||||
if output_mask:
|
if output_mask:
|
||||||
information["mask"] = [0] * len(information["sequence"])
|
information["mask"] = [0] * len(information["sequence"])
|
||||||
else:
|
else:
|
||||||
first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, str) else text
|
first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)] if isinstance(text, six.string_types) else text
|
||||||
second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, str) else text_pair
|
second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)] if isinstance(text_pair, six.string_types) else text_pair
|
||||||
|
|
||||||
if add_special_tokens:
|
if add_special_tokens:
|
||||||
information = self.prepare_pair_for_model(
|
information = self.prepare_pair_for_model(
|
||||||
|
|||||||
Reference in New Issue
Block a user