From e2a6445ebbc36121817c1f605d9a09a335f5fba5 Mon Sep 17 00:00:00 2001
From: Funtowicz Morgan <mfuntowicz@users.noreply.github.com>
Date: Thu, 20 Feb 2020 17:55:03 +0100
Subject: [PATCH] Tokenizer fast warnings (#2922)

* Remove warning when pad_to_max_length is not set.

Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>

* Move RoberTa warning to RoberTa and not GPT2 base tokenizer.

Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>
---
 src/transformers/tokenization_gpt2.py    | 6 ------
 src/transformers/tokenization_roberta.py | 6 ++++++
 src/transformers/tokenization_utils.py   | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/transformers/tokenization_gpt2.py b/src/transformers/tokenization_gpt2.py
index 19f578631a..5e8d9c7728 100644
--- a/src/transformers/tokenization_gpt2.py
+++ b/src/transformers/tokenization_gpt2.py
@@ -269,9 +269,3 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
             unk_token=unk_token,
             **kwargs,
         )
-
-        logger.warning(
-            "RobertaTokenizerFast has an issue when working on mask language modeling "
-            "where it introduces an extra encoded space before the mask token."
-            "See https://github.com/huggingface/transformers/pull/2778 for more information."
-        )
diff --git a/src/transformers/tokenization_roberta.py b/src/transformers/tokenization_roberta.py
index ff2aa11004..fda82fb307 100644
--- a/src/transformers/tokenization_roberta.py
+++ b/src/transformers/tokenization_roberta.py
@@ -211,6 +211,12 @@ class RobertaTokenizerFast(GPT2TokenizerFast):
         self.max_len_single_sentence = self.max_len - self.num_added_tokens(False)  # take into account special tokens
         self.max_len_sentences_pair = self.max_len - self.num_added_tokens(True)  # take into account special tokens
 
+        logger.warning(
+            "RobertaTokenizerFast has an issue when working on mask language modeling "
+            "where it introduces an extra encoded space before the mask token."
+            "See https://github.com/huggingface/transformers/pull/2778 for more information."
+        )
+
     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
         output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
         if token_ids_1 is None:
diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py
index e3af47b037..80ab188055 100644
--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -85,7 +85,7 @@ def truncate_and_pad(
             pad_type_id=pad_token_type_id,
             pad_token=pad_token,
         )
-    else:
+    elif pad_to_max_length:
         logger.warning(
             "Disabled padding because no padding token set (pad_token: {}, pad_token_id: {}).\n"
             "To remove this error, you can add a new pad token and then resize model embedding:\n"