rename prepare_translation_batch -> prepare_seq2seq_batch (#6103)
This commit is contained in:
@@ -63,7 +63,7 @@ Summarization Tips:
|
||||
(It rarely makes sense to start from `bart-large` unless you are a researching finetuning methods).
|
||||
|
||||
**Update 2018-07-18**
|
||||
Datasets: `Seq2SeqDataset` should be used for all tokenizers without a `prepare_translation_batch` method. For those who do (like Marian, MBart), `TranslationDataset` should be used.**
|
||||
Datasets: `Seq2SeqDataset` should be used for all tokenizers without a `prepare_seq2seq_batch` method. For those who do (like Marian, MBart), `TranslationDataset` should be used.**
|
||||
A new dataset is needed to support multilingual tasks.
|
||||
|
||||
|
||||
|
||||
@@ -145,7 +145,7 @@ class Seq2SeqDataset(Dataset):
|
||||
|
||||
|
||||
class TranslationDataset(Seq2SeqDataset):
|
||||
"""A dataset that calls prepare_translation_batch."""
|
||||
"""A dataset that calls prepare_seq2seq_batch."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -167,7 +167,7 @@ class TranslationDataset(Seq2SeqDataset):
|
||||
}
|
||||
|
||||
def collate_fn(self, batch) -> Dict[str, torch.Tensor]:
|
||||
batch_encoding = self.tokenizer.prepare_translation_batch(
|
||||
batch_encoding = self.tokenizer.prepare_seq2seq_batch(
|
||||
[x["src_texts"] for x in batch],
|
||||
src_lang=self.src_lang,
|
||||
tgt_texts=[x["tgt_texts"] for x in batch],
|
||||
|
||||
Reference in New Issue
Block a user