Add option to log only once in multinode training (#11819)

* Add option to long only once in multinode training

* Use an alternate property
This commit is contained in:
Sylvain Gugger
2021-05-25 08:03:43 -04:00
committed by GitHub
parent b8344a274f
commit f086652b16
15 changed files with 81 additions and 49 deletions

View File

@@ -44,7 +44,7 @@ from transformers import (
set_seed,
)
from transformers.testing_utils import CaptureLogger
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -202,7 +202,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -210,7 +210,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -43,7 +43,7 @@ from transformers import (
TrainingArguments,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -211,7 +211,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -219,7 +219,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -39,7 +39,7 @@ from transformers import (
XLNetLMHeadModel,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -208,7 +208,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -216,7 +216,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -41,7 +41,7 @@ from transformers import (
)
from transformers.file_utils import PaddingStrategy
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -235,7 +235,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -243,7 +243,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -40,7 +40,7 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from utils_qa import postprocess_qa_predictions
@@ -228,7 +228,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -236,7 +236,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -39,7 +39,7 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from utils_qa import postprocess_qa_predictions_with_beam_search
@@ -227,7 +227,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -235,7 +235,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -41,7 +41,7 @@ from transformers import (
set_seed,
)
from transformers.file_utils import is_offline_mode
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -284,7 +284,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -292,7 +292,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
logger.info(f"Training/evaluation parameters {training_args}")

View File

@@ -40,7 +40,7 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -216,7 +216,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -224,7 +224,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -40,7 +40,7 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -186,7 +186,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -195,7 +195,7 @@ def main():
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -40,7 +40,7 @@ from transformers import (
TrainingArguments,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -201,7 +201,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -209,7 +209,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

View File

@@ -44,7 +44,7 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
@@ -268,7 +268,7 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
logger.setLevel(logging.INFO if training_args.should_log else logging.WARN)
# Log on each process the small summary:
logger.warning(
@@ -276,7 +276,7 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
if training_args.should_log:
transformers.utils.logging.set_verbosity_info()
logger.info(f"Training/evaluation parameters {training_args}")