From 9eb3a410cd826e47b7c97db8af4056a2465e65eb Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 30 Oct 2020 15:27:20 -0400 Subject: [PATCH] Remove deprecated arguments from new run_clm (#8197) --- examples/language-modeling/run_clm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index 42f15d4626..b5d2c6d3aa 100644 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -259,14 +259,14 @@ def main(): ) if data_args.block_size <= 0: - block_size = tokenizer.max_len + block_size = tokenizer.model_max_length else: - if data_args.block_size > tokenizer.max_len: + if data_args.block_size > tokenizer.model_max_length: logger.warn( f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model" - f"({tokenizer.max_len}). Using block_size={tokenizer.max_len}." + f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}." ) - block_size = min(data_args.block_size, tokenizer.max_len) + block_size = min(data_args.block_size, tokenizer.model_max_length) # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size. def group_texts(examples):