[PyTorch Bart] Split Bart into different models (#9343)
* first try * remove old template * finish bart * finish mbart * delete unnecessary line * init pegasus * save intermediate * correct pegasus * finish pegasus * remove cookie cutter leftover * add marian * finish blenderbot * replace in file * correctly split blenderbot * delete "old" folder * correct "add statement" * adapt config for tf comp * correct configs for tf * remove ipdb * fix more stuff * fix mbart * push pegasus fix * fix mbart * more fixes * fix research projects code * finish docs for bart, mbart, and marian * delete unnecessary file * correct attn typo * correct configs * remove pegasus for seq class * correct peg docs * correct peg docs * finish configs * further improve docs * add copied from statements to mbart * fix copied from in mbart * add copy statements to marian * add copied from to marian * add pegasus copied from * finish pegasus * finish copied from * Apply suggestions from code review * make style * backward comp blenderbot * apply lysandres and sylvains suggestions * apply suggestions * push last fixes * fix docs * fix tok tests * fix imports code style * fix doc
This commit is contained in:
committed by
GitHub
parent
4eec5d0cf6
commit
eef66035a2
@@ -23,7 +23,7 @@ from pack_dataset import pack_data_dir
|
||||
from parameterized import parameterized
|
||||
from save_len_file import save_len_file
|
||||
from transformers import AutoTokenizer
|
||||
from transformers.models.bart.modeling_bart import shift_tokens_right
|
||||
from transformers.models.mbart.modeling_mbart import shift_tokens_right
|
||||
from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow
|
||||
from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset
|
||||
|
||||
|
||||
@@ -33,9 +33,8 @@ from torch import nn
|
||||
from torch.utils.data import Dataset, Sampler
|
||||
|
||||
from sentence_splitter import add_newline_to_end_of_each_sentence
|
||||
from transformers import BartTokenizer, EvalPrediction, PreTrainedTokenizer, T5Tokenizer
|
||||
from transformers import BartTokenizer, EvalPrediction, PreTrainedTokenizer
|
||||
from transformers.file_utils import cached_property
|
||||
from transformers.models.bart.modeling_bart import shift_tokens_right
|
||||
|
||||
|
||||
try:
|
||||
@@ -305,15 +304,9 @@ class Seq2SeqDataCollator:
|
||||
labels = trim_batch(labels, self.pad_token_id)
|
||||
input_ids, attention_mask = trim_batch(input_ids, self.pad_token_id, attention_mask=attention_mask)
|
||||
|
||||
if isinstance(self.tokenizer, T5Tokenizer):
|
||||
decoder_input_ids = self._shift_right_t5(labels)
|
||||
else:
|
||||
decoder_input_ids = shift_tokens_right(labels, self.pad_token_id)
|
||||
|
||||
batch = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": attention_mask,
|
||||
"decoder_input_ids": decoder_input_ids,
|
||||
"labels": labels,
|
||||
}
|
||||
return batch
|
||||
|
||||
Reference in New Issue
Block a user