Remove static pretrained maps from the library's internals (#29112)

* [test_all] Remove static pretrained maps from the library's internals * Deprecate archive maps instead of removing them * Revert init changes * [test_all] Deprecate instead of removing * [test_all] PVT v2 support * [test_all] Tests should all pass * [test_all] Style * Address review comments * Update src/transformers/models/deprecated/_archive_maps.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * Update src/transformers/models/deprecated/_archive_maps.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * [test_all] trigger tests * [test_all] LLAVA * [test_all] Bad rebase --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
2024-03-25 10:33:38 +01:00
parent 76a33a1092
commit 39114c0383
842 changed files with 4608 additions and 8613 deletions
--- a/tests/models/wav2vec2/test_tokenization_wav2vec2.py
+++ b/tests/models/wav2vec2/test_tokenization_wav2vec2.py
@@ -24,7 +24,6 @@ import unittest
 import numpy as np

 from transformers import (
-    WAV_2_VEC_2_PRETRAINED_MODEL_ARCHIVE_LIST,
    AddedToken,
    Wav2Vec2Config,
    Wav2Vec2CTCTokenizer,
@@ -357,13 +356,13 @@ class Wav2Vec2TokenizerTest(unittest.TestCase):
        # this test makes sure that models that are using
        # group norm don't have their tokenizer return the
        # attention_mask
-        for model_id in WAV_2_VEC_2_PRETRAINED_MODEL_ARCHIVE_LIST:
-            config = Wav2Vec2Config.from_pretrained(model_id)
-            tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_id)
+        model_id = "facebook/wav2vec2-base-960h"
+        config = Wav2Vec2Config.from_pretrained(model_id)
+        tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_id)

-            # only "layer" feature extraction norm should make use of
-            # attention_mask
-            self.assertEqual(tokenizer.return_attention_mask, config.feat_extract_norm == "layer")
+        # only "layer" feature extraction norm should make use of
+        # attention_mask
+        self.assertEqual(tokenizer.return_attention_mask, config.feat_extract_norm == "layer")


 class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
@@ -703,10 +702,6 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(expected_word_time_stamps_start, word_time_stamps_start)
        self.assertListEqual(expected_word_time_stamps_end, word_time_stamps_end)

-    def test_pretrained_model_lists(self):
-        # Wav2Vec2Model has no max model length => no testing
-        pass
-
    # overwrite from test_tokenization_common
    def test_add_tokens_tokenizer(self):
        tokenizers = self.get_tokenizers(do_lower_case=False)