remove duplicate words in msg (#31876)
This commit is contained in:
@@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
|
||||
def __post_init__(self):
|
||||
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
|
||||
raise ValueError(
|
||||
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
|
||||
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
|
||||
" language modeling. "
|
||||
)
|
||||
|
||||
|
||||
@@ -132,7 +132,7 @@ class PreprocessingArguments:
|
||||
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
|
||||
)
|
||||
output_dir: Optional[str] = field(
|
||||
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
|
||||
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
|
||||
)
|
||||
samples_per_file: Optional[int] = field(
|
||||
default=100_000, metadata={"help": "Number of files to save per JSON output file."}
|
||||
|
||||
Reference in New Issue
Block a user