output_token_type -> token_type_ids
This commit is contained in:
@@ -413,7 +413,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
|||||||
max_length=max_seq_length,
|
max_length=max_seq_length,
|
||||||
truncate_first_sequence=True # We're truncating the first sequence as a priority
|
truncate_first_sequence=True # We're truncating the first sequence as a priority
|
||||||
)
|
)
|
||||||
input_ids, segment_ids = inputs["input_ids"], inputs["output_token_type"]
|
input_ids, segment_ids = inputs["input_ids"], inputs["token_type_ids"]
|
||||||
|
|
||||||
# The mask has 1 for real tokens and 0 for padding tokens. Only real
|
# The mask has 1 for real tokens and 0 for padding tokens. Only real
|
||||||
# tokens are attended to.
|
# tokens are attended to.
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ class CommonTestCases:
|
|||||||
seq_0 = "Test this method."
|
seq_0 = "Test this method."
|
||||||
seq_1 = "With these inputs."
|
seq_1 = "With these inputs."
|
||||||
information = tokenizer.encode_plus(seq_0, seq_1, add_special_tokens=True, output_token_type=True)
|
information = tokenizer.encode_plus(seq_0, seq_1, add_special_tokens=True, output_token_type=True)
|
||||||
sequences, mask = information["input_ids"], information["output_token_type"]
|
sequences, mask = information["input_ids"], information["token_type_ids"]
|
||||||
assert len(sequences) == len(mask)
|
assert len(sequences) == len(mask)
|
||||||
|
|
||||||
def test_number_of_added_tokens(self):
|
def test_number_of_added_tokens(self):
|
||||||
|
|||||||
@@ -765,7 +765,7 @@ class PreTrainedTokenizer(object):
|
|||||||
information["input_ids"] = sequence_tokens
|
information["input_ids"] = sequence_tokens
|
||||||
|
|
||||||
if output_token_type:
|
if output_token_type:
|
||||||
information["output_token_type"] = [0] * len(information["input_ids"])
|
information["token_type_ids"] = [0] * len(information["input_ids"])
|
||||||
else:
|
else:
|
||||||
first_sentence_tokens = get_input_ids(text)
|
first_sentence_tokens = get_input_ids(text)
|
||||||
second_sentence_tokens = get_input_ids(text_pair)
|
second_sentence_tokens = get_input_ids(text_pair)
|
||||||
@@ -780,7 +780,7 @@ class PreTrainedTokenizer(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if output_token_type:
|
if output_token_type:
|
||||||
information["output_token_type"] = self.create_mask_from_sequences(text, text_pair)
|
information["token_type_ids"] = self.create_mask_from_sequences(text, text_pair)
|
||||||
else:
|
else:
|
||||||
logger.warning("No special tokens were added. The two sequences have been concatenated.")
|
logger.warning("No special tokens were added. The two sequences have been concatenated.")
|
||||||
sequence = first_sentence_tokens + second_sentence_tokens
|
sequence = first_sentence_tokens + second_sentence_tokens
|
||||||
@@ -789,7 +789,7 @@ class PreTrainedTokenizer(object):
|
|||||||
information["overflowing_tokens"] = sequence[max_length - stride:]
|
information["overflowing_tokens"] = sequence[max_length - stride:]
|
||||||
sequence = sequence[:max_length]
|
sequence = sequence[:max_length]
|
||||||
if output_token_type:
|
if output_token_type:
|
||||||
information["output_token_type"] = [0] * len(sequence)
|
information["token_type_ids"] = [0] * len(sequence)
|
||||||
|
|
||||||
information["input_ids"] = sequence
|
information["input_ids"] = sequence
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user