From 60414f31a9a67ade65d90191c0b8349a189978a6 Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Thu, 19 Sep 2019 10:53:45 +0200 Subject: [PATCH] GLUE updated with new methods --- examples/utils_glue.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/utils_glue.py b/examples/utils_glue.py index 8d0ed737d7..e2fc3a119a 100644 --- a/examples/utils_glue.py +++ b/examples/utils_glue.py @@ -405,7 +405,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length, if ex_index % 10000 == 0: logger.info("Writing example %d of %d" % (ex_index, len(examples))) - input_ids, segment_ids = tokenizer.encode(example.text_a, example.text_b, add_special_tokens=True, output_mask=True) + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + output_mask=True, + max_length=max_seq_length + ) + input_ids, segment_ids = inputs["sequence"], inputs["mask"] # The mask has 1 for real tokens and 0 for padding tokens. Only real # tokens are attended to.