Add option to log only once in multinode training (#11819)

* Add option to long only once in multinode training

* Use an alternate property
This commit is contained in:
Sylvain Gugger
2021-05-25 08:03:43 -04:00
committed by GitHub
parent b8344a274f
commit f086652b16
15 changed files with 81 additions and 49 deletions

View File

@@ -42,7 +42,7 @@ from transformers import ( # Trainer,; TrainingArguments,
# Will import SageMaker Model parallelism specific Trainer
from transformers.sagemaker import SageMakerTrainer as Trainer
from transformers.sagemaker import SageMakerTrainingArguments as TrainingArguments
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -210,7 +210,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -218,7 +218,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()