Mask computing in standalone method. Tests.

This commit is contained in:
LysandreJik
2019-09-19 10:13:10 +02:00
parent bf503158c5
commit c10c7d59e7
7 changed files with 81 additions and 23 deletions

View File

@@ -779,8 +779,8 @@ class PreTrainedTokenizer(object):
second_sentence_tokens
)
# if output_mask:
# sequence, information["mask"] = encoded_sequence
if output_mask:
information["mask"] = self.create_mask_from_sequences(text, text_pair)
information["sequence"] = sequence
else:
@@ -797,6 +797,10 @@ class PreTrainedTokenizer(object):
return information
def create_mask_from_sequences(self, sequence_0, sequence_1):
logger.warning("This tokenizer does not make use of special tokens.")
return [0] * len(self.encode(sequence_0)) + [1] * len(self.encode(sequence_1))
def add_special_tokens_single_sequence(self, token_ids):
logger.warning("This tokenizer does not make use of special tokens. The sequence has been returned with no modification.")
return token_ids