unified tokenizer api and serialization + tests
This commit is contained in:
@@ -37,9 +37,9 @@ from .modeling_bert import BertLayerNorm as LayerNorm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PRETRAINED_MODEL_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin",
|
||||
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin",
|
||||
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-pytorch_model.bin"}
|
||||
PRETRAINED_CONFIG_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json",
|
||||
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json",
|
||||
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json"}
|
||||
|
||||
def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
|
||||
@@ -103,7 +103,7 @@ def gelu(x):
|
||||
class GPT2Config(PretrainedConfig):
|
||||
"""Configuration class to store the configuration of a `GPT2Model`.
|
||||
"""
|
||||
pretrained_config_archive_map = PRETRAINED_CONFIG_ARCHIVE_MAP
|
||||
pretrained_config_archive_map = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -358,7 +358,7 @@ class GPT2PreTrainedModel(PreTrainedModel):
|
||||
a simple interface for dowloading and loading pretrained models.
|
||||
"""
|
||||
config_class = GPT2Config
|
||||
pretrained_model_archive_map = PRETRAINED_MODEL_ARCHIVE_MAP
|
||||
pretrained_model_archive_map = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||
load_tf_weights = load_tf_weights_in_gpt2
|
||||
base_model_prefix = "transformer"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user