Minor Bug Fix for Running Roberta on Glue (#3240)

* added return_token_type_ids argument for tokenizers which do not generate return_type_ids by default

* fixed styling

* Style

Co-authored-by: LysandreJik <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
Serkan Karakulak
2020-03-19 12:08:31 -04:00
committed by GitHub
parent 4e4403c9b4
commit b2c2c31c60

View File

@@ -90,7 +90,9 @@ def glue_convert_examples_to_features(
if ex_index % 10000 == 0:
logger.info("Writing example %d/%d" % (ex_index, len_examples))
inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,)
inputs = tokenizer.encode_plus(
example.text_a, example.text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True,
)
input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
# The mask has 1 for real tokens and 0 for padding tokens. Only real