Fix glue processor failing on tf datasets
This commit is contained in:
committed by
Lysandre Debut
parent
ca6ce3040d
commit
cefd51c50c
@@ -80,11 +80,15 @@ def glue_convert_examples_to_features(
|
|||||||
|
|
||||||
features = []
|
features = []
|
||||||
for (ex_index, example) in enumerate(examples):
|
for (ex_index, example) in enumerate(examples):
|
||||||
if ex_index % 10000 == 0:
|
len_examples = 0
|
||||||
logger.info("Writing example %d/%d" % (ex_index, len(examples)))
|
|
||||||
if is_tf_dataset:
|
if is_tf_dataset:
|
||||||
example = processor.get_example_from_tensor_dict(example)
|
example = processor.get_example_from_tensor_dict(example)
|
||||||
example = processor.tfds_map(example)
|
example = processor.tfds_map(example)
|
||||||
|
len_examples = tf.data.experimental.cardinality(examples)
|
||||||
|
else:
|
||||||
|
len_examples = len(examples)
|
||||||
|
if ex_index % 10000 == 0:
|
||||||
|
logger.info("Writing example %d/%d" % (ex_index, len_examples))
|
||||||
|
|
||||||
inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,)
|
inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,)
|
||||||
input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
|
input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
|
||||||
|
|||||||
Reference in New Issue
Block a user