Fix glue processor failing on tf datasets

2020-01-17 10:58:10 -07:00
parent ca6ce3040d
commit cefd51c50c
1 changed files with 6 additions and 2 deletions
--- a/src/transformers/data/processors/glue.py
+++ b/src/transformers/data/processors/glue.py
@@ -80,11 +80,15 @@ def glue_convert_examples_to_features(
    features = []
    for (ex_index, example) in enumerate(examples):
-        if ex_index % 10000 == 0:
+        len_examples = 0
            logger.info("Writing example %d/%d" % (ex_index, len(examples)))
        if is_tf_dataset:
            example = processor.get_example_from_tensor_dict(example)
            example = processor.tfds_map(example)
            len_examples = tf.data.experimental.cardinality(examples)
        else:
            len_examples = len(examples)
        if ex_index % 10000 == 0:
            logger.info("Writing example %d/%d" % (ex_index, len_examples))
        inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,)
        input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]