From 4850aaba6f39ea9ce255cf3d84b39fbc214f9329 Mon Sep 17 00:00:00 2001 From: Adam Louly Date: Mon, 11 Dec 2023 10:01:26 -0800 Subject: [PATCH] fix no sequence length models error (#27522) * fix no sequence length models error * block size check --------- Co-authored-by: Adam Louly --- examples/pytorch/language-modeling/run_clm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index df6e248bf7..3677ce24b3 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -510,7 +510,10 @@ def main(): f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). " f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx." ) - block_size = min(1024, max_pos_embeddings) + if max_pos_embeddings > 0: + block_size = min(1024, max_pos_embeddings) + else: + block_size = 1024 else: if data_args.block_size > tokenizer.model_max_length: logger.warning(