Remove static pretrained maps from the library's internals (#29112)
* [test_all] Remove static pretrained maps from the library's internals * Deprecate archive maps instead of removing them * Revert init changes * [test_all] Deprecate instead of removing * [test_all] PVT v2 support * [test_all] Tests should all pass * [test_all] Style * Address review comments * Update src/transformers/models/deprecated/_archive_maps.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * Update src/transformers/models/deprecated/_archive_maps.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * [test_all] trigger tests * [test_all] LLAVA * [test_all] Bad rebase --------- Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
@@ -20,11 +20,6 @@ from ...utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/config.json",
|
||||
# See all {{cookiecutter.modelname}} models at https://huggingface.co/models?filter={{cookiecutter.lowercase_modelname}}
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
|
||||
r"""
|
||||
|
||||
@@ -64,11 +64,6 @@ logger = logging.get_logger(__name__)
|
||||
_CHECKPOINT_FOR_DOC = "{{cookiecutter.checkpoint_identifier}}"
|
||||
_CONFIG_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Config"
|
||||
|
||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"{{cookiecutter.checkpoint_identifier}}",
|
||||
# See all {{cookiecutter.modelname}} models at https://huggingface.co/models?filter={{cookiecutter.lowercase_modelname}}
|
||||
]
|
||||
|
||||
|
||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings with Bert->{{cookiecutter.camelcase_modelname}}
|
||||
class TF{{cookiecutter.camelcase_modelname}}Embeddings(keras.layers.Layer):
|
||||
|
||||
@@ -57,11 +57,6 @@ logger = logging.get_logger(__name__)
|
||||
_CHECKPOINT_FOR_DOC = "{{cookiecutter.checkpoint_identifier}}"
|
||||
_CONFIG_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Config"
|
||||
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"{{cookiecutter.checkpoint_identifier}}",
|
||||
# See all {{cookiecutter.modelname}} models at https://huggingface.co/models?filter={{cookiecutter.lowercase_modelname}}
|
||||
]
|
||||
|
||||
|
||||
def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_checkpoint_path):
|
||||
"""Load tf checkpoints in a pytorch model."""
|
||||
@@ -1588,11 +1583,6 @@ _CHECKPOINT_FOR_DOC = "{{cookiecutter.checkpoint_identifier}}"
|
||||
_CONFIG_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Config"
|
||||
|
||||
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"{{cookiecutter.checkpoint_identifier}}",
|
||||
# See all {{cookiecutter.modelname}} models at https://huggingface.co/models?filter={{cookiecutter.lowercase_modelname}}
|
||||
]
|
||||
|
||||
|
||||
def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int):
|
||||
"""
|
||||
|
||||
@@ -40,8 +40,7 @@ if is_torch_available():
|
||||
{{cookiecutter.camelcase_modelname}}Model,
|
||||
)
|
||||
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
{{cookiecutter.uppercase_modelname}} )
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}ModelTester:
|
||||
@@ -453,9 +452,9 @@ class {{cookiecutter.camelcase_modelname}}ModelTest(ModelTesterMixin, unittest.T
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained(model_name)
|
||||
self.assertIsNotNone(model)
|
||||
model_name = "{{coockiecutter.checkpoint_identifier}}"
|
||||
model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained(model_name)
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
|
||||
@require_torch
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
@@ -46,7 +45,6 @@
|
||||
{% else %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"{{cookiecutter.camelcase_modelname}}ForConditionalGeneration",
|
||||
"{{cookiecutter.camelcase_modelname}}ForQuestionAnswering",
|
||||
@@ -63,7 +61,6 @@
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
_import_structure["models.{{cookiecutter.lowercase_modelname}}"].extend(
|
||||
[
|
||||
"TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMaskedLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForCausalLM",
|
||||
"TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice",
|
||||
@@ -122,7 +119,7 @@
|
||||
|
||||
# Below: " # Models"
|
||||
# Replace with:
|
||||
"models.{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP", "{{cookiecutter.camelcase_modelname}}Config", "{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
||||
"models.{{cookiecutter.lowercase_modelname}}": ["{{cookiecutter.camelcase_modelname}}Config", "{{cookiecutter.camelcase_modelname}}Tokenizer"],
|
||||
# End.
|
||||
|
||||
# To replace in: "src/transformers/__init__.py"
|
||||
@@ -130,7 +127,6 @@
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
@@ -144,7 +140,6 @@
|
||||
)
|
||||
{% else %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
|
||||
{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
@@ -159,8 +154,7 @@
|
||||
# Replace with:
|
||||
{% if cookiecutter.is_encoder_decoder_model == "False" %}
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import (
|
||||
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF_{{cookiecutter.uppercase_modelname}} TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice,
|
||||
TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
|
||||
@@ -209,9 +203,9 @@
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
|
||||
# End.
|
||||
|
||||
# Below: " from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
|
||||
# Below: " from .models.albert import AlbertConfig"
|
||||
# Replace with:
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP, {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.uppercase_modelname}}{{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
# End.
|
||||
|
||||
|
||||
@@ -229,11 +223,6 @@
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}Config"),
|
||||
# End.
|
||||
|
||||
# Below: "# Add archive maps here"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.uppercase_modelname}}_PRETRAINED_CONFIG_ARCHIVE_MAP"),
|
||||
# End.
|
||||
|
||||
# Below: "# Add full (and cased) model names here"
|
||||
# Replace with:
|
||||
("{{cookiecutter.lowercase_modelname}}", "{{cookiecutter.camelcase_modelname}}"),
|
||||
|
||||
@@ -30,15 +30,6 @@ PRETRAINED_VOCAB_FILES_MAP = {
|
||||
}
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 512,
|
||||
}
|
||||
|
||||
|
||||
PRETRAINED_INIT_CONFIGURATION = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": {"do_lower_case": False},
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
||||
r"""
|
||||
@@ -53,8 +44,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
@@ -67,22 +56,6 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||
},
|
||||
"merges_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||
},
|
||||
"tokenizer_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
||||
r"""
|
||||
@@ -96,8 +69,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
@@ -114,19 +85,6 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "tokenizer_file": "tokenizer.json"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
||||
},
|
||||
"tokenizer_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast):
|
||||
"""
|
||||
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's *tokenizers* library).
|
||||
@@ -137,8 +95,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -29,15 +29,6 @@ PRETRAINED_VOCAB_FILES_MAP = {
|
||||
}
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 512,
|
||||
}
|
||||
|
||||
|
||||
PRETRAINED_INIT_CONFIGURATION = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": {"do_lower_case": False},
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
||||
r"""
|
||||
@@ -52,8 +43,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
|
||||
from ...utils import logging
|
||||
@@ -64,19 +53,6 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
|
||||
},
|
||||
"merges_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
||||
"""
|
||||
@@ -90,8 +66,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
from typing import List, Optional
|
||||
@@ -107,15 +81,6 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
|
||||
|
||||
PRETRAINED_VOCAB_FILES_MAP = {
|
||||
"vocab_file": {
|
||||
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.txt",
|
||||
},
|
||||
}
|
||||
|
||||
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
|
||||
"{{cookiecutter.checkpoint_identifier}}": 1024,
|
||||
}
|
||||
|
||||
class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
@@ -127,8 +92,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
model_input_names = ["input_ids", "attention_mask"]
|
||||
|
||||
def __init__(
|
||||
@@ -269,8 +232,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
||||
"""
|
||||
|
||||
vocab_files_names = VOCAB_FILES_NAMES
|
||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
model_input_names = ["input_ids", "attention_mask"]
|
||||
|
||||
def __init__(
|
||||
|
||||
Reference in New Issue
Block a user