From ce9eade29c75fa676ac528d1fe21d9f4ac3c5622 Mon Sep 17 00:00:00 2001
From: Lysandre <lysandre.debut@reseau.eseo.fr>
Date: Wed, 30 Oct 2019 20:50:44 +0000
Subject: [PATCH] Initializer range using BertPreTrainedModel

---
 transformers/modeling_albert.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/transformers/modeling_albert.py b/transformers/modeling_albert.py
index 7e9f7f1c46..b45208b696 100644
--- a/transformers/modeling_albert.py
+++ b/transformers/modeling_albert.py
@@ -6,8 +6,7 @@ import torch
 import torch.nn as nn
 from torch.nn import CrossEntropyLoss
 from transformers.configuration_albert import AlbertConfig
-from transformers.modeling_bert import BertEmbeddings, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN
-from transformers.modeling_utils import PreTrainedModel
+from transformers.modeling_bert import BertEmbeddings, BertPreTrainedModel, BertModel, BertSelfAttention, prune_linear_layer, ACT2FN
 from .file_utils import add_start_docstrings
 
 logger = logging.getLogger(__name__)
@@ -362,7 +361,7 @@ class AlbertModel(BertModel):
 
 
 @add_start_docstrings("Bert Model with a `language modeling` head on top.", ALBERT_START_DOCSTRING, ALBERT_INPUTS_DOCSTRING)
-class AlbertForMaskedLM(PreTrainedModel):
+class AlbertForMaskedLM(BertPreTrainedModel):
     r"""
         **masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
             Labels for computing the masked language modeling loss.