From f31af3927f4091f5fb8126c77a0addebd4c1fe94 Mon Sep 17 00:00:00 2001 From: Mathias Nielsen Date: Mon, 20 Nov 2023 15:45:42 +0100 Subject: [PATCH] [ examples] fix loading jsonl with load dataset in run translation example (#26924) * Renamed variable extension to builder_name * If builder name is jsonl change to json to align with load_datasets * Apply suggestions from code review Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> --------- Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> --- examples/pytorch/translation/run_translation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 6edbe6a995..98780483a3 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -374,8 +374,12 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.test_file.split(".")[-1] + if extension == "jsonl": + builder_name = "json" # the "json" builder reads both .json and .jsonl files + else: + builder_name = extension # e.g. "parquet" raw_datasets = load_dataset( - extension, + builder_name, data_files=data_files, cache_dir=model_args.cache_dir, token=model_args.token,