Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commitb7672826ca. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit833fc17a3e. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
This commit is contained in:
committed by
GitHub
parent
485fd81471
commit
a14b055b65
@@ -42,6 +42,15 @@ def parse_args():
|
||||
parser.add_argument(
|
||||
"--dataset_config", type=str, default="wikitext-103-raw-v1", help="Configuration name of the dataset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--trust_remote_code",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Whether to trust the execution of code from datasets/models defined on the Hub."
|
||||
" This option should only be set to `True` for repositories you trust and in which you have read the"
|
||||
" code, as it will execute code present on the Hub on your local machine."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer_name_or_path",
|
||||
type=str,
|
||||
@@ -105,7 +114,9 @@ def get_serialized_examples(tokenized_data):
|
||||
|
||||
|
||||
def main(args):
|
||||
dataset = datasets.load_dataset(args.dataset_name, args.dataset_config, split=args.split)
|
||||
dataset = datasets.load_dataset(
|
||||
args.dataset_name, args.dataset_config, split=args.split, trust_remote_code=args.trust_remote_code
|
||||
)
|
||||
|
||||
if args.limit is not None:
|
||||
max_samples = min(len(dataset), args.limit)
|
||||
|
||||
@@ -41,6 +41,15 @@ def parse_args():
|
||||
parser.add_argument(
|
||||
"--dataset_config", type=str, default="wikitext-103-raw-v1", help="Configuration name of the dataset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--trust_remote_code",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Whether to trust the execution of code from datasets/models defined on the Hub."
|
||||
" This option should only be set to `True` for repositories you trust and in which you have read the"
|
||||
" code, as it will execute code present on the Hub on your local machine."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch_size",
|
||||
type=int,
|
||||
@@ -69,7 +78,9 @@ def parse_args():
|
||||
|
||||
|
||||
def main(args):
|
||||
dataset = datasets.load_dataset(args.dataset_name, args.dataset_config, split="train")
|
||||
dataset = datasets.load_dataset(
|
||||
args.dataset_name, args.dataset_config, split="train", trust_remote_code=args.trust_remote_code
|
||||
)
|
||||
|
||||
if args.limit is not None:
|
||||
max_train_samples = min(len(dataset), args.limit)
|
||||
|
||||
Reference in New Issue
Block a user