Force the return of token type IDs (#3439)

2020-03-26 04:41:36 -04:00
parent 010e0460b2
commit ffcffebe85
2 changed files with 4 additions and 1 deletions
--- a/examples/utils_multiple_choice.py
+++ b/examples/utils_multiple_choice.py
@@ -320,7 +320,9 @@ def convert_examples_to_features(
            else:
                text_b = example.question + " " + ending
-            inputs = tokenizer.encode_plus(text_a, text_b, add_special_tokens=True, max_length=max_length,)
+            inputs = tokenizer.encode_plus(
                text_a, text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True
            )
            if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
                logger.info(
                    "Attention! you are cropping tokens (swag task is ok). "
--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q
            pad_to_max_length=True,
            stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
            truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first",
            return_token_type_ids=True,
        )
        paragraph_len = min(