From d2753dcbec7123500c1a84a7c2143a79e74df48f Mon Sep 17 00:00:00 2001 From: Bhavitvya Malik Date: Fri, 11 Jun 2021 01:29:55 +0530 Subject: [PATCH] add relevant description to tqdm in examples (#11927) * add relevant `desc` in examples * require_version datasets>=1.8.0 --- examples/pytorch/text-classification/requirements.txt | 2 +- examples/pytorch/text-classification/run_glue.py | 9 ++++++++- .../pytorch/text-classification/run_glue_no_trainer.py | 8 +++++++- examples/pytorch/text-classification/run_xnli.py | 5 +++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/examples/pytorch/text-classification/requirements.txt b/examples/pytorch/text-classification/requirements.txt index 1ad472d68b..ef7666daf3 100644 --- a/examples/pytorch/text-classification/requirements.txt +++ b/examples/pytorch/text-classification/requirements.txt @@ -1,5 +1,5 @@ accelerate -datasets >= 1.1.3 +datasets >= 1.8.0 sentencepiece != 0.1.92 protobuf torch >= 1.3 diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index b4ab137c70..461ee6f9b6 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -42,10 +42,12 @@ from transformers import ( ) from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version +from transformers.utils.versions import require_version # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.7.0.dev0") +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") task_to_keys = { "cola": ("sentence", None), @@ -393,7 +395,12 @@ def main(): result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]] return result - datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache) + datasets = datasets.map( + preprocess_function, + batched=True, + load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on dataset", + ) if training_args.do_train: if "train" not in datasets: raise ValueError("--do_train requires a train dataset") diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 9ff500b5aa..aa2e03ef77 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -38,10 +38,13 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.utils.versions import require_version logger = logging.getLogger(__name__) +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") + task_to_keys = { "cola": ("sentence", None), "mnli": ("premise", "hypothesis"), @@ -305,7 +308,10 @@ def main(): return result processed_datasets = raw_datasets.map( - preprocess_function, batched=True, remove_columns=raw_datasets["train"].column_names + preprocess_function, + batched=True, + remove_columns=raw_datasets["train"].column_names, + desc="Running tokenizer on dataset", ) train_dataset = processed_datasets["train"] diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index a409d283b4..e38b74fa33 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -42,10 +42,12 @@ from transformers import ( ) from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version +from transformers.utils.versions import require_version # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.7.0.dev0") +require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") logger = logging.getLogger(__name__) @@ -280,6 +282,7 @@ def main(): preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on train dataset", ) # Log a few random samples from the training set: for index in random.sample(range(len(train_dataset)), 3): @@ -292,6 +295,7 @@ def main(): preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on validation dataset", ) if training_args.do_predict: @@ -301,6 +305,7 @@ def main(): preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache, + desc="Running tokenizer on prediction dataset", ) # Get the metric function