From cefd51c50cc08be8146c1151544495968ce8f2ad Mon Sep 17 00:00:00 2001 From: James Betker Date: Fri, 17 Jan 2020 10:58:10 -0700 Subject: [PATCH] Fix glue processor failing on tf datasets --- src/transformers/data/processors/glue.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/transformers/data/processors/glue.py b/src/transformers/data/processors/glue.py index 11bccdfbc7..87885577fa 100644 --- a/src/transformers/data/processors/glue.py +++ b/src/transformers/data/processors/glue.py @@ -80,11 +80,15 @@ def glue_convert_examples_to_features( features = [] for (ex_index, example) in enumerate(examples): - if ex_index % 10000 == 0: - logger.info("Writing example %d/%d" % (ex_index, len(examples))) + len_examples = 0 if is_tf_dataset: example = processor.get_example_from_tensor_dict(example) example = processor.tfds_map(example) + len_examples = tf.data.experimental.cardinality(examples) + else: + len_examples = len(examples) + if ex_index % 10000 == 0: + logger.info("Writing example %d/%d" % (ex_index, len_examples)) inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,) input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]