[ examples] fix loading jsonl with load dataset in run translation example (#26924)
* Renamed variable extension to builder_name * If builder name is jsonl change to json to align with load_datasets * Apply suggestions from code review Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> --------- Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com>
This commit is contained in:
@@ -374,8 +374,12 @@ def main():
|
|||||||
if data_args.test_file is not None:
|
if data_args.test_file is not None:
|
||||||
data_files["test"] = data_args.test_file
|
data_files["test"] = data_args.test_file
|
||||||
extension = data_args.test_file.split(".")[-1]
|
extension = data_args.test_file.split(".")[-1]
|
||||||
|
if extension == "jsonl":
|
||||||
|
builder_name = "json" # the "json" builder reads both .json and .jsonl files
|
||||||
|
else:
|
||||||
|
builder_name = extension # e.g. "parquet"
|
||||||
raw_datasets = load_dataset(
|
raw_datasets = load_dataset(
|
||||||
extension,
|
builder_name,
|
||||||
data_files=data_files,
|
data_files=data_files,
|
||||||
cache_dir=model_args.cache_dir,
|
cache_dir=model_args.cache_dir,
|
||||||
token=model_args.token,
|
token=model_args.token,
|
||||||
|
|||||||
Reference in New Issue
Block a user