From 539ee456d49e1073ce0bf4655bca4e24a60df8c2 Mon Sep 17 00:00:00 2001 From: Bhadresh Savani Date: Fri, 25 Jun 2021 22:58:42 +0100 Subject: [PATCH] [Examples] Replicates the new --log_level feature to all trainer-based pytorch (#12359) * added log_level * fix comment * fixed log_level * Trigger CI * Unfied logging * simplified args for log_level --- examples/pytorch/language-modeling/run_clm.py | 32 +++++++----- examples/pytorch/language-modeling/run_mlm.py | 33 +++++++----- examples/pytorch/language-modeling/run_plm.py | 34 ++++++------ examples/pytorch/multiple-choice/run_swag.py | 25 ++++----- examples/pytorch/question-answering/run_qa.py | 40 +++++++------- .../question-answering/run_qa_beam_search.py | 39 +++++++------- .../pytorch/question-answering/utils_qa.py | 16 +++--- .../summarization/run_summarization.py | 35 +++++++------ .../pytorch/text-classification/run_glue.py | 52 ++++++++++--------- .../pytorch/text-classification/run_xnli.py | 15 +++--- .../pytorch/token-classification/run_ner.py | 42 ++++++++------- .../run_ner_no_trainer.py | 2 +- .../pytorch/translation/run_translation.py | 2 + 13 files changed, 202 insertions(+), 165 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 5f28662af7..a30278a615 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -28,6 +28,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets from datasets import load_dataset import transformers @@ -203,18 +204,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -246,15 +248,17 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) - if "validation" not in datasets.keys(): - datasets["validation"] = load_dataset( + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) + if "validation" not in raw_datasets.keys(): + raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, ) - datasets["train"] = load_dataset( + raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", @@ -273,7 +277,7 @@ def main(): ) if extension == "txt": extension = "text" - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -334,9 +338,9 @@ def main(): # Preprocessing the datasets. # First we tokenize all the texts. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names else: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names text_column_name = "text" if "text" in column_names else column_names[0] # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function @@ -352,7 +356,7 @@ def main(): ) return output - tokenized_datasets = datasets.map( + tokenized_datasets = raw_datasets.map( tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 48f642712e..84bc59186b 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -28,6 +28,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets from datasets import load_dataset import transformers @@ -212,7 +213,13 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( @@ -220,10 +227,6 @@ def main(): + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -255,15 +258,17 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) - if "validation" not in datasets.keys(): - datasets["validation"] = load_dataset( + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) + if "validation" not in raw_datasets.keys(): + raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, ) - datasets["train"] = load_dataset( + raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", @@ -278,7 +283,7 @@ def main(): extension = data_args.train_file.split(".")[-1] if extension == "txt": extension = "text" - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -337,9 +342,9 @@ def main(): # Preprocessing the datasets. # First we tokenize all the texts. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names else: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names text_column_name = "text" if "text" in column_names else column_names[0] if data_args.max_seq_length is None: @@ -377,7 +382,7 @@ def main(): return_special_tokens_mask=True, ) - tokenized_datasets = datasets.map( + tokenized_datasets = raw_datasets.map( tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, @@ -392,7 +397,7 @@ def main(): def tokenize_function(examples): return tokenizer(examples[text_column_name], return_special_tokens_mask=True) - tokenized_datasets = datasets.map( + tokenized_datasets = raw_datasets.map( tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 28d5c7f316..e608827f34 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -25,6 +25,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets from datasets import load_dataset import transformers @@ -209,18 +210,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -252,15 +254,17 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) - if "validation" not in datasets.keys(): - datasets["validation"] = load_dataset( + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) + if "validation" not in raw_datasets.keys(): + raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, ) - datasets["train"] = load_dataset( + raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", @@ -275,7 +279,7 @@ def main(): extension = data_args.train_file.split(".")[-1] if extension == "txt": extension = "text" - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -334,9 +338,9 @@ def main(): # Preprocessing the datasets. # First we tokenize all the texts. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names else: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names text_column_name = "text" if "text" in column_names else column_names[0] if data_args.max_seq_length > tokenizer.model_max_length: @@ -355,7 +359,7 @@ def main(): examples["text"] = [line for line in examples["text"] if len(line) > 0 and not line.isspace()] return tokenizer(examples["text"], padding=padding, truncation=True, max_length=max_seq_length) - tokenized_datasets = datasets.map( + tokenized_datasets = raw_datasets.map( tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, @@ -368,7 +372,7 @@ def main(): def tokenize_function(examples): return tokenizer(examples[text_column_name]) - tokenized_datasets = datasets.map( + tokenized_datasets = raw_datasets.map( tokenize_function, batched=True, num_proc=data_args.preprocessing_num_workers, diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index a18742117b..bdbd5cf911 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional, Union +import datasets import numpy as np import torch from datasets import load_dataset @@ -220,18 +221,18 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -268,10 +269,10 @@ def main(): if data_args.validation_file is not None: data_files["validation"] = data_args.validation_file extension = data_args.train_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) else: # Downloading and loading the swag dataset from the hub. - datasets = load_dataset("swag", "regular", cache_dir=model_args.cache_dir) + raw_datasets = load_dataset("swag", "regular", cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -347,9 +348,9 @@ def main(): return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()} if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: train_dataset = train_dataset.select(range(data_args.max_train_samples)) train_dataset = train_dataset.map( @@ -360,9 +361,9 @@ def main(): ) if training_args.do_eval: - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_dataset = datasets["validation"] + eval_dataset = raw_datasets["validation"] if data_args.max_eval_samples is not None: eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) eval_dataset = eval_dataset.map( diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index b036f3214f..db08cc22af 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets from datasets import load_dataset, load_metric import transformers @@ -216,18 +217,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -259,7 +261,9 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: data_files = {} if data_args.train_file is not None: @@ -272,7 +276,7 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.test_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -314,11 +318,11 @@ def main(): # Preprocessing the datasets. # Preprocessing is slighlty different for training and evaluation. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names elif training_args.do_eval: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names else: - column_names = datasets["test"].column_names + column_names = raw_datasets["test"].column_names question_column_name = "question" if "question" in column_names else column_names[0] context_column_name = "context" if "context" in column_names else column_names[1] answer_column_name = "answers" if "answers" in column_names else column_names[2] @@ -407,9 +411,9 @@ def main(): return tokenized_examples if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: # We will select sample from whole data if agument is specified train_dataset = train_dataset.select(range(data_args.max_train_samples)) @@ -469,9 +473,9 @@ def main(): return tokenized_examples if training_args.do_eval: - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_examples = datasets["validation"] + eval_examples = raw_datasets["validation"] if data_args.max_eval_samples is not None: # We will select sample from whole data eval_examples = eval_examples.select(range(data_args.max_eval_samples)) @@ -489,9 +493,9 @@ def main(): eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) if training_args.do_predict: - if "test" not in datasets: + if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_examples = datasets["test"] + predict_examples = raw_datasets["test"] if data_args.max_predict_samples is not None: # We will select sample from whole data predict_examples = predict_examples.select(range(data_args.max_predict_samples)) @@ -529,7 +533,7 @@ def main(): max_answer_length=data_args.max_answer_length, null_score_diff_threshold=data_args.null_score_diff_threshold, output_dir=training_args.output_dir, - is_world_process_zero=trainer.is_world_process_zero(), + log_level=log_level, prefix=stage, ) # Format the result to the format the metric expects. diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index 7e2717b891..52a27a162d 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets from datasets import load_dataset, load_metric import transformers @@ -215,18 +216,18 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -258,7 +259,9 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: data_files = {} if data_args.train_file is not None: @@ -270,7 +273,7 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.test_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -303,11 +306,11 @@ def main(): # Preprocessing the datasets. # Preprocessing is slighlty different for training and evaluation. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names elif training_args.do_eval: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names else: - column_names = datasets["test"].column_names + column_names = raw_datasets["test"].column_names question_column_name = "question" if "question" in column_names else column_names[0] context_column_name = "context" if "context" in column_names else column_names[1] answer_column_name = "answers" if "answers" in column_names else column_names[2] @@ -419,9 +422,9 @@ def main(): return tokenized_examples if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: # Select samples from Dataset, This will help to decrease processing time train_dataset = train_dataset.select(range(data_args.max_train_samples)) @@ -505,9 +508,9 @@ def main(): return tokenized_examples if training_args.do_eval: - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_examples = datasets["validation"] + eval_examples = raw_datasets["validation"] if data_args.max_eval_samples is not None: # Selecting Eval Samples from Dataset eval_examples = eval_examples.select(range(data_args.max_eval_samples)) @@ -525,9 +528,9 @@ def main(): eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) if training_args.do_predict: - if "test" not in datasets: + if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_examples = datasets["test"] + predict_examples = raw_datasets["test"] if data_args.max_predict_samples is not None: # We will select sample from whole data predict_examples = predict_examples.select(range(data_args.max_predict_samples)) @@ -566,7 +569,7 @@ def main(): start_n_top=model.config.start_n_top, end_n_top=model.config.end_n_top, output_dir=training_args.output_dir, - is_world_process_zero=trainer.is_world_process_zero(), + log_level=log_level, prefix=stage, ) # Format the result to the format the metric expects. diff --git a/examples/pytorch/question-answering/utils_qa.py b/examples/pytorch/question-answering/utils_qa.py index 2f8f0a60c4..fef20639f0 100644 --- a/examples/pytorch/question-answering/utils_qa.py +++ b/examples/pytorch/question-answering/utils_qa.py @@ -38,7 +38,7 @@ def postprocess_qa_predictions( null_score_diff_threshold: float = 0.0, output_dir: Optional[str] = None, prefix: Optional[str] = None, - is_world_process_zero: bool = True, + log_level: Optional[int] = logging.WARNING, ): """ Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the @@ -70,8 +70,8 @@ def postprocess_qa_predictions( answers, are saved in `output_dir`. prefix (:obj:`str`, `optional`): If provided, the dictionaries mentioned above are saved with `prefix` added to their names. - is_world_process_zero (:obj:`bool`, `optional`, defaults to :obj:`True`): - Whether this process is the main process or not (used to determine if logging/saves should be done). + log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``): + ``logging`` log level (e.g., ``logging.WARNING``) """ assert len(predictions) == 2, "`predictions` should be a tuple with two elements (start_logits, end_logits)." all_start_logits, all_end_logits = predictions @@ -91,7 +91,7 @@ def postprocess_qa_predictions( scores_diff_json = collections.OrderedDict() # Logging. - logger.setLevel(logging.INFO if is_world_process_zero else logging.WARN) + logger.setLevel(log_level) logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.") # Let's loop over all the examples! @@ -250,7 +250,7 @@ def postprocess_qa_predictions_with_beam_search( end_n_top: int = 5, output_dir: Optional[str] = None, prefix: Optional[str] = None, - is_world_process_zero: bool = True, + log_level: Optional[int] = logging.WARNING, ): """ Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the @@ -280,8 +280,8 @@ def postprocess_qa_predictions_with_beam_search( answers, are saved in `output_dir`. prefix (:obj:`str`, `optional`): If provided, the dictionaries mentioned above are saved with `prefix` added to their names. - is_world_process_zero (:obj:`bool`, `optional`, defaults to :obj:`True`): - Whether this process is the main process or not (used to determine if logging/saves should be done). + log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``): + ``logging`` log level (e.g., ``logging.WARNING``) """ assert len(predictions) == 5, "`predictions` should be a tuple with five elements." start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions @@ -302,7 +302,7 @@ def postprocess_qa_predictions_with_beam_search( scores_diff_json = collections.OrderedDict() if version_2_with_negative else None # Logging. - logger.setLevel(logging.INFO if is_world_process_zero else logging.WARN) + logger.setLevel(log_level) logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.") # Let's loop over all the examples! diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index 21541b428d..9e7c13e266 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets import nltk # Here to have a nice missing dependency error message early on import numpy as np from datasets import load_dataset, load_metric @@ -260,16 +261,18 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() logger.info(f"Training/evaluation parameters {training_args}") if data_args.source_prefix is None and model_args.model_name_or_path in [ @@ -313,7 +316,9 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: data_files = {} if data_args.train_file is not None: @@ -325,7 +330,7 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.test_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -366,11 +371,11 @@ def main(): # Preprocessing the datasets. # We need to tokenize inputs and targets. if training_args.do_train: - column_names = datasets["train"].column_names + column_names = raw_datasets["train"].column_names elif training_args.do_eval: - column_names = datasets["validation"].column_names + column_names = raw_datasets["validation"].column_names elif training_args.do_predict: - column_names = datasets["test"].column_names + column_names = raw_datasets["test"].column_names else: logger.info("There is nothing to do. Please pass `do_train`, `do_eval` and/or `do_predict`.") return @@ -425,9 +430,9 @@ def main(): return model_inputs if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: train_dataset = train_dataset.select(range(data_args.max_train_samples)) train_dataset = train_dataset.map( @@ -441,9 +446,9 @@ def main(): if training_args.do_eval: max_target_length = data_args.val_max_target_length - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_dataset = datasets["validation"] + eval_dataset = raw_datasets["validation"] if data_args.max_eval_samples is not None: eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) eval_dataset = eval_dataset.map( @@ -457,9 +462,9 @@ def main(): if training_args.do_predict: max_target_length = data_args.val_max_target_length - if "test" not in datasets: + if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_dataset = datasets["test"] + predict_dataset = raw_datasets["test"] if data_args.max_predict_samples is not None: predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) predict_dataset = predict_dataset.map( diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 3531c03e31..99606fd909 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -23,6 +23,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets import numpy as np from datasets import load_dataset, load_metric @@ -204,18 +205,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -250,10 +252,12 @@ def main(): # download the dataset. if data_args.task_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset("glue", data_args.task_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset("glue", data_args.task_name, cache_dir=model_args.cache_dir) elif data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: # Loading a dataset from your local files. # CSV/JSON training and evaluation files are needed. @@ -277,10 +281,10 @@ def main(): if data_args.train_file.endswith(".csv"): # Loading a dataset from local csv files - datasets = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir) else: # Loading a dataset from local json files - datasets = load_dataset("json", data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset("json", data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -288,19 +292,19 @@ def main(): if data_args.task_name is not None: is_regression = data_args.task_name == "stsb" if not is_regression: - label_list = datasets["train"].features["label"].names + label_list = raw_datasets["train"].features["label"].names num_labels = len(label_list) else: num_labels = 1 else: # Trying to have good defaults here, don't hesitate to tweak to your needs. - is_regression = datasets["train"].features["label"].dtype in ["float32", "float64"] + is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"] if is_regression: num_labels = 1 else: # A useful fast method: # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique - label_list = datasets["train"].unique("label") + label_list = raw_datasets["train"].unique("label") label_list.sort() # Let's sort it for determinism num_labels = len(label_list) @@ -332,12 +336,12 @@ def main(): use_auth_token=True if model_args.use_auth_token else None, ) - # Preprocessing the datasets + # Preprocessing the raw_datasets if data_args.task_name is not None: sentence1_key, sentence2_key = task_to_keys[data_args.task_name] else: # Again, we try to have some nice defaults but don't hesitate to tweak to your use case. - non_label_column_names = [name for name in datasets["train"].column_names if name != "label"] + non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"] if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names: sentence1_key, sentence2_key = "sentence1", "sentence2" else: @@ -396,30 +400,30 @@ def main(): result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]] return result - datasets = datasets.map( + raw_datasets = raw_datasets.map( preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on dataset", ) if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: train_dataset = train_dataset.select(range(data_args.max_train_samples)) if training_args.do_eval: - if "validation" not in datasets and "validation_matched" not in datasets: + if "validation" not in raw_datasets and "validation_matched" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_dataset = datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] + eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] if data_args.max_eval_samples is not None: eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None: - if "test" not in datasets and "test_matched" not in datasets: + if "test" not in raw_datasets and "test_matched" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_dataset = datasets["test_matched" if data_args.task_name == "mnli" else "test"] + predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"] if data_args.max_predict_samples is not None: predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) @@ -497,7 +501,7 @@ def main(): eval_datasets = [eval_dataset] if data_args.task_name == "mnli": tasks.append("mnli-mm") - eval_datasets.append(datasets["validation_mismatched"]) + eval_datasets.append(raw_datasets["validation_mismatched"]) for eval_dataset, task in zip(eval_datasets, tasks): metrics = trainer.evaluate(eval_dataset=eval_dataset) @@ -518,7 +522,7 @@ def main(): predict_datasets = [predict_dataset] if data_args.task_name == "mnli": tasks.append("mnli-mm") - predict_datasets.append(datasets["test_mismatched"]) + predict_datasets.append(raw_datasets["test_mismatched"]) for predict_dataset, task in zip(predict_datasets, tasks): # Removing the `label` columns because it contains -1 and Trainer won't like that. diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index 4043bc1c84..ca037ae079 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -24,6 +24,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets import numpy as np from datasets import load_dataset, load_metric @@ -174,19 +175,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 646347a275..cbdd0379cb 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -25,6 +25,7 @@ import sys from dataclasses import dataclass, field from typing import Optional +import datasets import numpy as np from datasets import ClassLabel, load_dataset, load_metric @@ -195,18 +196,19 @@ def main(): datefmt="%m/%d/%Y %H:%M:%S", handlers=[logging.StreamHandler(sys.stdout)], ) - logger.setLevel(logging.INFO if training_args.should_log else logging.WARN) + + log_level = training_args.get_process_log_level() + logger.setLevel(log_level) + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) - # Set the verbosity to info of the Transformers logger (on main process only): - if training_args.should_log: - transformers.utils.logging.set_verbosity_info() - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") # Detecting last checkpoint. @@ -238,7 +240,9 @@ def main(): # download the dataset. if data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. - datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset( + data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir + ) else: data_files = {} if data_args.train_file is not None: @@ -248,16 +252,16 @@ def main(): if data_args.test_file is not None: data_files["test"] = data_args.test_file extension = data_args.train_file.split(".")[-1] - datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) + raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. if training_args.do_train: - column_names = datasets["train"].column_names - features = datasets["train"].features + column_names = raw_datasets["train"].column_names + features = raw_datasets["train"].features else: - column_names = datasets["validation"].column_names - features = datasets["validation"].features + column_names = raw_datasets["validation"].column_names + features = raw_datasets["validation"].features if data_args.text_column_name is not None: text_column_name = data_args.text_column_name @@ -288,7 +292,7 @@ def main(): # No need to convert the labels since they are already ints. label_to_id = {i: i for i in range(len(label_list))} else: - label_list = get_label_list(datasets["train"][label_column_name]) + label_list = get_label_list(raw_datasets["train"][label_column_name]) label_to_id = {l: i for i, l in enumerate(label_list)} num_labels = len(label_list) @@ -381,9 +385,9 @@ def main(): return tokenized_inputs if training_args.do_train: - if "train" not in datasets: + if "train" not in raw_datasets: raise ValueError("--do_train requires a train dataset") - train_dataset = datasets["train"] + train_dataset = raw_datasets["train"] if data_args.max_train_samples is not None: train_dataset = train_dataset.select(range(data_args.max_train_samples)) train_dataset = train_dataset.map( @@ -395,9 +399,9 @@ def main(): ) if training_args.do_eval: - if "validation" not in datasets: + if "validation" not in raw_datasets: raise ValueError("--do_eval requires a validation dataset") - eval_dataset = datasets["validation"] + eval_dataset = raw_datasets["validation"] if data_args.max_eval_samples is not None: eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) eval_dataset = eval_dataset.map( @@ -409,9 +413,9 @@ def main(): ) if training_args.do_predict: - if "test" not in datasets: + if "test" not in raw_datasets: raise ValueError("--do_predict requires a test dataset") - predict_dataset = datasets["test"] + predict_dataset = raw_datasets["test"] if data_args.max_predict_samples is not None: predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) predict_dataset = predict_dataset.map( diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 7a68a333c2..4dea38d5c5 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -344,7 +344,7 @@ def main(): model.resize_token_embeddings(len(tokenizer)) - # Preprocessing the raw_datasets. + # Preprocessing the datasets. # First we tokenize all the texts. padding = "max_length" if args.pad_to_max_length else False diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index b41386f0fe..21ac0fdf59 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -250,6 +250,8 @@ def main(): logger.setLevel(log_level) datasets.utils.logging.set_verbosity(log_level) transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() # Log on each process the small summary: logger.warning(