From 2dd8f524f5ad475afce6ee01258d992593e69bea Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Wed, 6 Mar 2019 10:10:41 +0100
Subject: [PATCH] removing test for long sequences error following #337

---
 tests/tokenization_test.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/tests/tokenization_test.py b/tests/tokenization_test.py
index 6a14e05ae8..78e145ffd2 100644
--- a/tests/tokenization_test.py
+++ b/tests/tokenization_test.py
@@ -46,24 +46,6 @@ class TokenizationTest(unittest.TestCase):
         self.assertListEqual(
             tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
 
-    def test_full_tokenizer_raises_error_for_long_sequences(self):
-        vocab_tokens = [
-            "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
-            "##ing", ","
-        ]
-        with open("/tmp/bert_tokenizer_test.txt", "w", encoding='utf-8') as vocab_writer:
-            vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
-            vocab_file = vocab_writer.name
-
-        tokenizer = BertTokenizer(vocab_file, max_len=10)
-        os.remove(vocab_file)
-        tokens = tokenizer.tokenize(u"the cat sat on the mat in the summer time")
-        indices = tokenizer.convert_tokens_to_ids(tokens)
-        self.assertListEqual(indices, [0 for _ in range(10)])
-
-        tokens = tokenizer.tokenize(u"the cat sat on the mat in the summer time .")
-        self.assertRaises(ValueError, tokenizer.convert_tokens_to_ids, tokens)
-
     def test_chinese(self):
         tokenizer = BasicTokenizer()