run_tf_glue works with all tasks

This commit is contained in:
Lysandre
2019-10-24 21:41:45 +00:00
parent ae1d03fc51
commit bab6ad01aa
3 changed files with 46 additions and 8 deletions

View File

@@ -76,10 +76,14 @@ def glue_convert_examples_to_features(examples, tokenizer,
features = []
for (ex_index, example) in enumerate(examples):
if ex_index == 10:
break
if ex_index % 10000 == 0:
logger.info("Writing example %d" % (ex_index))
if is_tf_dataset:
example = processor.get_example_from_tensor_dict(example)
example = processor.tfds_map(example)
inputs = tokenizer.encode_plus(
example.text_a,

View File

@@ -107,6 +107,13 @@ class DataProcessor(object):
"""Gets the list of labels for this data set."""
raise NotImplementedError()
def tfds_map(self, example):
"""Some tensorflow_datasets datasets are not formatted the same way the GLUE datasets are.
This method converts examples to the correct format."""
if len(self.get_labels()) > 1:
example.label = self.get_labels()[int(example.label)]
return example
@classmethod
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""