From 77c0ce8c0cead8bba06654b4b8e3a652f28a6718 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Tue, 9 Feb 2021 10:38:12 -0500
Subject: [PATCH] Fix some edge cases in report_to and add deprecation warnings
 (#10100)

---
 src/transformers/integrations.py  |  4 ++++
 src/transformers/training_args.py | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py
index 7c73803e44..a8c7296d02 100644
--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
@@ -56,6 +56,10 @@ from .trainer_utils import PREFIX_CHECKPOINT_DIR, BestRun, EvaluationStrategy  #
 def is_wandb_available():
     # any value of WANDB_DISABLED disables wandb
     if os.getenv("WANDB_DISABLED", "").upper() in ENV_VARS_TRUE_VALUES:
+        logger.warn(
+            "Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the "
+            "--report_to flag to control the integrations used for logging result (for instance --report_to none)."
+        )
         return False
     return importlib.util.find_spec("wandb") is not None
 
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
index 1e669b7219..32af14bd60 100644
--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -239,9 +239,10 @@ class TrainingArguments:
         group_by_length (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Whether or not to group together samples of roughly the same legnth in the training dataset (to minimize
             padding applied and be more efficient). Only useful if applying dynamic padding.
-        report_to (:obj:`List[str]`, `optional`, defaults to the list of integrations platforms installed):
+        report_to (:obj:`str` or :obj:`List[str]`, `optional`, defaults to :obj:`"all"`):
             The list of integrations to report the results and logs to. Supported platforms are :obj:`"azure_ml"`,
-            :obj:`"comet_ml"`, :obj:`"mlflow"`, :obj:`"tensorboard"` and :obj:`"wandb"`.
+            :obj:`"comet_ml"`, :obj:`"mlflow"`, :obj:`"tensorboard"` and :obj:`"wandb"`. Use :obj:`"all"` to report to
+            all integrations installed, :obj:`"none"` for no integrations.
         ddp_find_unused_parameters (:obj:`bool`, `optional`):
             When using distributed training, the value of the flag :obj:`find_unused_parameters` passed to
             :obj:`DistributedDataParallel`. Will default to :obj:`False` if gradient checkpointing is used, :obj:`True`
@@ -478,10 +479,21 @@ class TrainingArguments:
         if is_torch_available() and self.device.type != "cuda" and self.fp16:
             raise ValueError("Mixed precision training with AMP or APEX (`--fp16`) can only be used on CUDA devices.")
         if self.report_to is None:
+            logger.info(
+                "The default value for the training argument `--report_to` will change in v5 (from all installed "
+                "integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as "
+                "now. You should start updating your code and make this info disappear :-)."
+            )
+            self.report_to = "all"
+        if self.report_to == "all" or self.report_to == ["all"]:
             # Import at runtime to avoid a circular import.
             from .integrations import get_available_reporting_integrations
 
             self.report_to = get_available_reporting_integrations()
+        elif self.report_to == "none" or self.report_to == ["none"]:
+            self.report_to = []
+        elif not isinstance(self.report_to, list):
+            self.report_to = [self.report_to]
 
     def __repr__(self):
         # We override the default repr to remove deprecated arguments from the repr. This method should be removed once