remove duplicate words in msg (#31876)

This commit is contained in:
yukionfire
2024-07-10 16:54:45 +08:00
committed by GitHub
parent 97aa3e2905
commit e9eeedaf3b
3 changed files with 3 additions and 3 deletions

View File

@@ -290,7 +290,7 @@ class FlaxDataCollatorForBartDenoisingLM:
def __post_init__(self):
if self.tokenizer.mask_token is None or self.tokenizer.eos_token is None:
raise ValueError(
"This tokenizer does not have a mask token or eos token token which is necessary for denoising"
"This tokenizer does not have a mask token or eos token which is necessary for denoising"
" language modeling. "
)

View File

@@ -132,7 +132,7 @@ class PreprocessingArguments:
default="transformersbook/codeparrot", metadata={"help": "Folder or name of dataset to process."}
)
output_dir: Optional[str] = field(
default="codeparrot-clean", metadata={"help": "Folder to save processed processed dataset."}
default="codeparrot-clean", metadata={"help": "Folder to save processed dataset."}
)
samples_per_file: Optional[int] = field(
default=100_000, metadata={"help": "Number of files to save per JSON output file."}