[Seq2Seq Trainer] Make sure padding is implemented for models without pad_token (#8043)

* make sure padding is implemented for non-padding tokens models as well

* add better error message

* add better warning

* remove results files

* Update examples/seq2seq/seq2seq_trainer.py

* remove unnecessary copy line

* correct usage of labels

* delete test files
This commit is contained in:
Patrick von Platen
2020-10-26 17:28:16 +01:00
committed by GitHub
parent 098ddc2244
commit 664c7ec453
2 changed files with 33 additions and 20 deletions

View File

@@ -63,7 +63,9 @@ class TestFinetuneTrainer(TestCasePlus):
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert2bert.config.vocab_size = bert2bert.config.encoder.vocab_size
bert2bert.config.eos_token_id = tokenizer.sep_token_id
bert2bert.config.decoder_start_token_id = tokenizer.cls_token_id
bert2bert.config.max_length = 128
train_dataset = datasets.load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]")
val_dataset = datasets.load_dataset("cnn_dailymail", "3.0.0", split="validation[:1%]")