Use DataCollatorForSeq2Seq in run_summarization in all cases (#10856)

Co-authored-by: Eliza <eliza@habanero.tiger.com.pl>
This commit is contained in:
Eliza Szczechla
2021-03-22 20:05:39 +01:00
committed by GitHub
parent a8d4d6776d
commit 9f8fa4e973

View File

@@ -38,7 +38,6 @@ from transformers import (
HfArgumentParser, HfArgumentParser,
Seq2SeqTrainer, Seq2SeqTrainer,
Seq2SeqTrainingArguments, Seq2SeqTrainingArguments,
default_data_collator,
set_seed, set_seed,
) )
from transformers.file_utils import is_offline_mode from transformers.file_utils import is_offline_mode
@@ -466,9 +465,6 @@ def main():
# Data collator # Data collator
label_pad_token_id = -100 if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id label_pad_token_id = -100 if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
if data_args.pad_to_max_length:
data_collator = default_data_collator
else:
data_collator = DataCollatorForSeq2Seq( data_collator = DataCollatorForSeq2Seq(
tokenizer, tokenizer,
model=model, model=model,