Remove static pretrained maps from the library's internals (#29112)

* [test_all] Remove static pretrained maps from the library's internals

* Deprecate archive maps instead of removing them

* Revert init changes

* [test_all] Deprecate instead of removing

* [test_all] PVT v2 support

* [test_all] Tests should all pass

* [test_all] Style

* Address review comments

* Update src/transformers/models/deprecated/_archive_maps.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/deprecated/_archive_maps.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* [test_all] trigger tests

* [test_all] LLAVA

* [test_all] Bad rebase

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Lysandre Debut
2024-03-25 10:33:38 +01:00
committed by GitHub
parent 76a33a1092
commit 39114c0383
842 changed files with 4608 additions and 8613 deletions

View File

@@ -1023,24 +1023,6 @@ class TokenizerTesterMixin:
decoded = tokenizer.decode(encoded, spaces_between_special_tokens=False)
self.assertIn(decoded, ["[ABC][SAMPLE][DEF]", "[ABC][SAMPLE][DEF]".lower()])
def test_pretrained_model_lists(self):
# We should have at least one default checkpoint for each tokenizer
# We should specify the max input length as well (used in some part to list the pretrained checkpoints)
self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1)
self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1)
self.assertEqual(
len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]),
len(self.tokenizer_class.max_model_input_sizes),
)
weights_list = list(self.tokenizer_class.max_model_input_sizes.keys())
weights_lists_2 = []
for file_id, map_list in self.tokenizer_class.pretrained_vocab_files_map.items():
weights_lists_2.append(list(map_list.keys()))
for weights_list_2 in weights_lists_2:
self.assertListEqual(weights_list, weights_list_2)
def test_mask_output(self):
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers: