fix tests - bump up version

2019-02-17 23:57:23 +01:00
parent ffd623823d
commit 009ee86a19
6 changed files with 28 additions and 79 deletions
--- a/tests/modeling_gpt2_test.py
+++ b/tests/modeling_gpt2_test.py
@@ -38,7 +38,6 @@ class GPT2ModelTest(unittest.TestCase):
                     use_token_type_ids=True,
                     use_labels=True,
                     vocab_size=99,
-                     n_special=1,
                     n_positions=33,
                     n_embd=32,
                     n_layer=5,
@@ -56,7 +55,6 @@ class GPT2ModelTest(unittest.TestCase):
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
-            self.n_special = n_special
            self.n_positions = n_positions
            self.n_embd = n_embd
            self.n_layer = n_layer
@@ -76,7 +74,7 @@ class GPT2ModelTest(unittest.TestCase):

            token_type_ids = None
            if self.use_token_type_ids:
-                total_voc = self.vocab_size + self.n_special
+                total_voc = self.vocab_size
                token_type_ids = GPT2ModelTest.ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)

            mc_labels = None
@@ -90,7 +88,6 @@ class GPT2ModelTest(unittest.TestCase):
            config = GPT2Config(
                vocab_size_or_config_json_file=self.vocab_size,
                n_positions=self.n_positions,
-                n_special=self.n_special,
                n_embd=self.n_embd,
                n_layer=self.n_layer,
                n_head=self.n_head,
@@ -130,7 +127,7 @@ class GPT2ModelTest(unittest.TestCase):
            return outputs

        def check_gpt2_lm_head_output(self, result):
-            total_voc = self.n_special + self.vocab_size
+            total_voc = self.vocab_size
            self.parent.assertListEqual(
                list(result["lm_logits"].size()),
                [self.batch_size, self.n_choices, self.seq_length, total_voc])
@@ -157,7 +154,7 @@ class GPT2ModelTest(unittest.TestCase):
            return outputs

        def check_gpt2_double_heads_output(self, result):
-            total_voc = self.n_special + self.vocab_size
+            total_voc = self.vocab_size
            self.parent.assertListEqual(
                list(result["lm_logits"].size()),
                [self.batch_size, self.n_choices, self.seq_length, total_voc])
--- a/tests/tokenization_gpt2_test.py
+++ b/tests/tokenization_gpt2_test.py
@@ -1,56 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import os
-import unittest
-import json
-
-from pytorch_pretrained_bert.tokenization_gpt2 import GPT2Tokenizer
-
-
-class GPT2TokenizationTest(unittest.TestCase):
-
-    def test_full_tokenizer(self):
-        """ Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt """
-        vocab = ["l", "o", "w", "e", "r", "s", "t", "i", "d", "n",
-                 "w</w>", "r</w>", "t</w>",
-                 "lo", "low", "er</w>",
-                 "low</w>", "lowest</w>", "newer</w>", "wider</w>"]
-        vocab_tokens = dict(zip(vocab, range(len(vocab))))
-        merges = ["#version: 0.2", "l o", "lo w", "e r</w>", ""]
-        with open("/tmp/openai_tokenizer_vocab_test.json", "w") as fp:
-            json.dump(vocab_tokens, fp)
-            vocab_file = fp.name
-        with open("/tmp/openai_tokenizer_merges_test.txt", "w") as fp:
-            fp.write("\n".join(merges))
-            merges_file = fp.name
-
-        tokenizer = GPT2Tokenizer(vocab_file, merges_file)
-        os.remove(vocab_file)
-        os.remove(merges_file)
-
-        text = "lower"
-        bpe_tokens = ["low", "er</w>"]
-        tokens = tokenizer.tokenize(text)
-        self.assertListEqual(tokens, bpe_tokens)
-
-        input_tokens = tokens
-        input_bpe_tokens = [14, 15, 20]
-        self.assertListEqual(
-            tokenizer.convert_tokens_to_ids(input_tokens), input_bpe_tokens)
-
-if __name__ == '__main__':
-    unittest.main()