Adds predict stage for glue tasks, and generate result files which can be submitted to gluebenchmark.com (#4463)

* Adds predict stage for glue tasks, and generate result files which could be submitted to gluebenchmark.com website.

* Use Split enum + always output the label name

Co-authored-by: Julien Chaumond <chaumond@gmail.com>
This commit is contained in:
Zhangyx
2020-05-21 21:17:44 +08:00
committed by GitHub
parent 271bedb485
commit 49296533ca
6 changed files with 140 additions and 45 deletions

View File

@@ -30,7 +30,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
data_args = GlueDataTrainingArguments(
task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
)
dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
data_collator = DefaultDataCollator()
batch = data_collator.collate_batch(dataset.features)
self.assertEqual(batch["labels"].dtype, torch.long)
@@ -41,7 +41,7 @@ class DataCollatorIntegrationTest(unittest.TestCase):
data_args = GlueDataTrainingArguments(
task_name="sts-b", data_dir="./tests/fixtures/tests_samples/STS-B", overwrite_cache=True
)
dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
data_collator = DefaultDataCollator()
batch = data_collator.collate_batch(dataset.features)
self.assertEqual(batch["labels"].dtype, torch.float)
@@ -93,7 +93,7 @@ class TrainerIntegrationTest(unittest.TestCase):
data_args = GlueDataTrainingArguments(
task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, evaluate=True)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
training_args = TrainingArguments(output_dir="./examples", no_cuda=True)
trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset)