From 4ea19de80c38fe7f32978a17f122f84709d6565f Mon Sep 17 00:00:00 2001 From: Gaurang Tandon <1gaurangtandon@gmail.com> Date: Wed, 8 Dec 2021 18:25:30 +0000 Subject: [PATCH] fix: verify jsonlines file in run_translation (#14660) (#14661) * fix: verify jsonl in run_translation (#14660) * fix(run_translation.py): json/jsonl validation Both json and jsonl are to be accepted as valid jsonlines file extension * fix(run_translation.py): make black happy * Ran make style --- examples/pytorch/translation/run_translation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 0331023ae5..f8d326c617 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -216,12 +216,16 @@ class DataTrainingArguments: elif self.source_lang is None or self.target_lang is None: raise ValueError("Need to specify the source language and the target language.") + # accepting both json and jsonl file extensions, as + # many jsonlines files actually have a .json extension + valid_extensions = ["json", "jsonl"] + if self.train_file is not None: extension = self.train_file.split(".")[-1] - assert extension == "json", "`train_file` should be a json file." + assert extension in valid_extensions, "`train_file` should be a jsonlines file." if self.validation_file is not None: extension = self.validation_file.split(".")[-1] - assert extension == "json", "`validation_file` should be a json file." + assert extension in valid_extensions, "`validation_file` should be a jsonlines file." if self.val_max_target_length is None: self.val_max_target_length = self.max_target_length