From f3d0866ed999ba3fefef9c9c100c485eb9080f42 Mon Sep 17 00:00:00 2001 From: Elysium1436 <61297992+Elysium1436@users.noreply.github.com> Date: Tue, 27 Jul 2021 22:01:40 -0300 Subject: [PATCH] Correct validation_split_percentage argument from int (ex:5) to float (0.05) (#12897) * Fixed train_test_split test_size argument * `Seq2SeqTrainer` set max_length and num_beams only when non None (#12899) * set max_length and num_beams only when non None * fix instance variables * fix code style * [FLAX] Minor fixes in CLM example (#12914) * readme: fix retrieval of vocab size for flax clm example * examples: fix flax clm example when using training/evaluation files * Fix module path for symbolic_trace example Co-authored-by: cchen-dialpad <47165889+cchen-dialpad@users.noreply.github.com> Co-authored-by: Stefan Schweter Co-authored-by: Sylvain Gugger --- examples/tensorflow/language-modeling/run_clm.py | 2 +- examples/tensorflow/language-modeling/run_mlm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 92d74cfd6f..97ac093bb8 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -438,7 +438,7 @@ def main(): f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation as provided in data_args" ) train_indices, val_indices = train_test_split( - list(range(len(train_dataset))), test_size=data_args.validation_split_percentage + list(range(len(train_dataset))), test_size=data_args.validation_split_percentage / 100 ) eval_dataset = train_dataset.select(val_indices) diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index d862c4423e..ebfb165b7e 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -499,7 +499,7 @@ def main(): f"Validation file not found: using {data_args.validation_split_percentage}% of the dataset as validation as provided in data_args" ) train_indices, val_indices = train_test_split( - list(range(len(train_dataset))), test_size=data_args.validation_split_percentage + list(range(len(train_dataset))), test_size=data_args.validation_split_percentage / 100 ) eval_dataset = train_dataset.select(val_indices)