Remove static pretrained maps from the library's internals (#29112)

* [test_all] Remove static pretrained maps from the library's internals

* Deprecate archive maps instead of removing them

* Revert init changes

* [test_all] Deprecate instead of removing

* [test_all] PVT v2 support

* [test_all] Tests should all pass

* [test_all] Style

* Address review comments

* Update src/transformers/models/deprecated/_archive_maps.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/deprecated/_archive_maps.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* [test_all] trigger tests

* [test_all] LLAVA

* [test_all] Bad rebase

---------

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Lysandre Debut
2024-03-25 10:33:38 +01:00
committed by GitHub
parent 76a33a1092
commit 39114c0383
842 changed files with 4608 additions and 8613 deletions

View File

@@ -24,7 +24,6 @@ import unittest
import numpy as np
from transformers import (
WAV_2_VEC_2_PRETRAINED_MODEL_ARCHIVE_LIST,
AddedToken,
Wav2Vec2Config,
Wav2Vec2CTCTokenizer,
@@ -357,13 +356,13 @@ class Wav2Vec2TokenizerTest(unittest.TestCase):
# this test makes sure that models that are using
# group norm don't have their tokenizer return the
# attention_mask
for model_id in WAV_2_VEC_2_PRETRAINED_MODEL_ARCHIVE_LIST:
config = Wav2Vec2Config.from_pretrained(model_id)
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_id)
model_id = "facebook/wav2vec2-base-960h"
config = Wav2Vec2Config.from_pretrained(model_id)
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_id)
# only "layer" feature extraction norm should make use of
# attention_mask
self.assertEqual(tokenizer.return_attention_mask, config.feat_extract_norm == "layer")
# only "layer" feature extraction norm should make use of
# attention_mask
self.assertEqual(tokenizer.return_attention_mask, config.feat_extract_norm == "layer")
class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
@@ -703,10 +702,6 @@ class Wav2Vec2CTCTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(expected_word_time_stamps_start, word_time_stamps_start)
self.assertListEqual(expected_word_time_stamps_end, word_time_stamps_end)
def test_pretrained_model_lists(self):
# Wav2Vec2Model has no max model length => no testing
pass
# overwrite from test_tokenization_common
def test_add_tokens_tokenizer(self):
tokenizers = self.get_tokenizers(do_lower_case=False)