Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commitb7672826ca. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit833fc17a3e. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
This commit is contained in:
committed by
GitHub
parent
485fd81471
commit
a14b055b65
@@ -253,7 +253,7 @@ def main():
|
||||
# download the dataset.
|
||||
if data_args.task_name is not None:
|
||||
# Downloading and loading a dataset from the hub.
|
||||
datasets = load_dataset("glue", data_args.task_name)
|
||||
datasets = load_dataset("nyu-mll/glue", data_args.task_name)
|
||||
else:
|
||||
# Loading a dataset from your local files.
|
||||
# CSV/JSON training and evaluation files are needed.
|
||||
|
||||
@@ -56,7 +56,7 @@ if __name__ == "__main__":
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
|
||||
|
||||
# Load dataset
|
||||
train_dataset, test_dataset = load_dataset("imdb", split=["train", "test"])
|
||||
train_dataset, test_dataset = load_dataset("stanfordnlp/imdb", split=["train", "test"])
|
||||
train_dataset = train_dataset.shuffle().select(range(5000)) # smaller the size for train dataset to 5k
|
||||
test_dataset = test_dataset.shuffle().select(range(500)) # smaller the size for test dataset to 500
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ def fit(model, loss, opt, train_dataset, epochs, train_batch_size, max_steps=Non
|
||||
|
||||
def get_datasets(tokenizer, train_batch_size, eval_batch_size):
|
||||
# Load dataset
|
||||
train_dataset, test_dataset = load_dataset("imdb", split=["train", "test"])
|
||||
train_dataset, test_dataset = load_dataset("stanfordnlp/imdb", split=["train", "test"])
|
||||
|
||||
# Preprocess train dataset
|
||||
train_dataset = train_dataset.map(
|
||||
|
||||
Reference in New Issue
Block a user