From ce9eade29c75fa676ac528d1fe21d9f4ac3c5622 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Wed, 30 Oct 2019 20:50:44 +0000 Subject: [PATCH] Initializer range using BertPreTrainedModel --- transformers/modeling_albert.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/transformers/modeling_albert.py b/transformers/modeling_albert.py index 7e9f7f1c46..b45208b696 100644 --- a/transformers/modeling_albert.py +++ b/transformers/modeling_albert.py @@ -6,8 +6,7 @@ import torch import torch.nn as nn from torch.nn import CrossEntropyLoss from transformers.configuration_albert import AlbertConfig -from transformers.modeling_bert import BertEmbeddings, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN -from transformers.modeling_utils import PreTrainedModel +from transformers.modeling_bert import BertEmbeddings, BertPreTrainedModel, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN from .file_utils import add_start_docstrings logger = logging.getLogger(__name__) @@ -362,7 +361,7 @@ class AlbertModel(BertModel): @add_start_docstrings("Bert Model with a `language modeling` head on top.", ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING) -class AlbertForMaskedLM(PreTrainedModel): +class AlbertForMaskedLM(BertPreTrainedModel): r""" **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``: Labels for computing the masked language modeling loss.