[Seq2Seq Trainer] Make sure padding is implemented for models without pad_token (#8043)
* make sure padding is implemented for non-padding tokens models as well * add better error message * add better warning * remove results files * Update examples/seq2seq/seq2seq_trainer.py * remove unnecessary copy line * correct usage of labels * delete test files
This commit is contained in:
committed by
GitHub
parent
098ddc2244
commit
664c7ec453
@@ -63,7 +63,9 @@ class TestFinetuneTrainer(TestCasePlus):
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
|
||||
bert2bert.config.vocab_size = bert2bert.config.encoder.vocab_size
|
||||
bert2bert.config.eos_token_id = tokenizer.sep_token_id
|
||||
bert2bert.config.decoder_start_token_id = tokenizer.cls_token_id
|
||||
bert2bert.config.max_length = 128
|
||||
|
||||
train_dataset = datasets.load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]")
|
||||
val_dataset = datasets.load_dataset("cnn_dailymail", "3.0.0", split="validation[:1%]")
|
||||
|
||||
Reference in New Issue
Block a user