Kill model archive maps (#4636)

* Kill model archive maps

* Fixup

* Also kill model_archive_map for MaskedBertPreTrainedModel

* Unhook config_archive_map

* Tokenizers: align with model id changes

* make style && make quality

* Fix CI
This commit is contained in:
Julien Chaumond
2020-06-02 09:39:33 -04:00
committed by GitHub
parent 47a551d17b
commit d4c2cb402d
115 changed files with 792 additions and 1323 deletions

View File

@@ -127,7 +127,7 @@ class BertJapaneseTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(tokenizer.tokenize("こんばんは こんばんにちは こんにちは"), ["こん", "##ばんは", "[UNK]", "こんにちは"])
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese")
tokenizer = self.tokenizer_class.from_pretrained("cl-tohoku/bert-base-japanese")
text = tokenizer.encode("ありがとう。", add_special_tokens=False)
text_2 = tokenizer.encode("どういたしまして。", add_special_tokens=False)
@@ -188,7 +188,7 @@ class BertJapaneseCharacterTokenizationTest(TokenizerTesterMixin, unittest.TestC
self.assertListEqual(tokenizer.tokenize("こんにちほ"), ["", "", "", "", "[UNK]"])
def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("bert-base-japanese-char")
tokenizer = self.tokenizer_class.from_pretrained("cl-tohoku/bert-base-japanese-char")
text = tokenizer.encode("ありがとう。", add_special_tokens=False)
text_2 = tokenizer.encode("どういたしまして。", add_special_tokens=False)