From 8cba0572603516f4f0d7fcd52fb76dce885b1358 Mon Sep 17 00:00:00 2001
From: LysandreJik <lysandre.debut@reseau.eseo.fr>
Date: Thu, 19 Sep 2019 09:42:13 +0200
Subject: [PATCH] Doc + remove artefacts

---
 pytorch_transformers/tokenization_utils.py | 41 ++--------------------
 1 file changed, 2 insertions(+), 39 deletions(-)

diff --git a/pytorch_transformers/tokenization_utils.py b/pytorch_transformers/tokenization_utils.py
index 3a3ebd49be..2a31aec887 100644
--- a/pytorch_transformers/tokenization_utils.py
+++ b/pytorch_transformers/tokenization_utils.py
@@ -724,9 +724,8 @@ class PreTrainedTokenizer(object):
 
     def encode_plus(self, text, text_pair=None, add_special_tokens=False, output_mask=False, max_length=None, **kwargs):
         """
-        Converts a string in a sequence of ids (integer), using the tokenizer and vocabulary.
-
-        Same as doing ``self.convert_tokens_to_ids(self.tokenize(text))``.
+        Returns a dictionary containing the encoded sequence or sequence pair. Other values can be returned by this
+        method: the mask for sequence classification and the overflowing elements if a ``max_length`` is specified.
 
         Args:
             text: The first sequence to be encoded.
@@ -801,42 +800,6 @@ class PreTrainedTokenizer(object):
 
         return information
 
-        if text_pair is None:
-            if add_special_tokens:
-                sequence_tokens = self.convert_tokens_to_ids(self.tokenize(text, **kwargs))
-                if max_length:
-                    sequence_tokens = sequence_tokens[:max_length - self.num_added_tokens()]
-                return self.add_special_tokens_single_sentence(sequence_tokens)
-            else:
-                ids = self.convert_tokens_to_ids(self.tokenize(text, **kwargs))
-                return ids[:max_length] if max_length != -1 else ids
-
-        first_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text, **kwargs)]
-        second_sentence_tokens = [self._convert_token_to_id(token) for token in self.tokenize(text_pair, **kwargs)]
-
-        if add_special_tokens:
-            if max_length:
-                if len(first_sentence_tokens) + self.num_added_tokens(pair=True) >= max_length:
-                    logger.warning(
-                        "The first sequence is longer than the maximum specified length. This sequence will not be truncated.")
-                else:
-                    if len(second_sentence_tokens) + len(first_sentence_tokens) + self.num_added_tokens(
-                            pair=True) > max_length:
-                        second_sentence_tokens = second_sentence_tokens[
-                                                 :max_length - len(first_sentence_tokens) - self.num_added_tokens(
-                                                     pair=True)]
-
-            return self.add_special_tokens_sentences_pair(first_sentence_tokens, second_sentence_tokens,
-                                                          output_mask)
-        else:
-            if max_length:
-                first_sentence_tokens = first_sentence_tokens[:max_length]
-                second_sentence_tokens = second_sentence_tokens[:max_length]
-
-            if output_mask:
-                logger.warning("Can't output mask if you're not joining two sequences.")
-            return first_sentence_tokens, second_sentence_tokens
-
     def add_special_tokens_single_sentence(self, token_ids):
         logger.warning("This tokenizer does not make use of special tokens. The sequence has been returned with no modification.")
         return token_ids