updating tests
This commit is contained in:
@@ -617,6 +617,7 @@ class BertModel(BertPreTrainedModel):
|
|||||||
old_embeddings = self.embeddings.word_embeddings
|
old_embeddings = self.embeddings.word_embeddings
|
||||||
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
|
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
|
||||||
self.embeddings.word_embeddings = new_embeddings
|
self.embeddings.word_embeddings = new_embeddings
|
||||||
|
return self.embeddings.word_embeddings
|
||||||
|
|
||||||
def _prune_heads(self, heads_to_prune):
|
def _prune_heads(self, heads_to_prune):
|
||||||
""" Prunes heads of the model.
|
""" Prunes heads of the model.
|
||||||
@@ -758,11 +759,8 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.bert.embeddings.word_embeddings.weight
|
self._tie_or_clone_weights(self.cls.predictions.decoder,
|
||||||
if self.config.torchscript:
|
self.bert.embeddings.word_embeddings)
|
||||||
self.cls.predictions.decoder.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.cls.predictions.decoder.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None,
|
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None,
|
||||||
next_sentence_label=None, head_mask=None):
|
next_sentence_label=None, head_mask=None):
|
||||||
@@ -864,11 +862,8 @@ class BertForMaskedLM(BertPreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.bert.embeddings.word_embeddings.weight
|
self._tie_or_clone_weights(self.cls.predictions.decoder,
|
||||||
if self.config.torchscript:
|
self.bert.embeddings.word_embeddings)
|
||||||
self.cls.predictions.decoder.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.cls.predictions.decoder.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, head_mask=None):
|
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, head_mask=None):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -414,6 +414,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
|||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
self.wte = self._get_resized_embeddings(self.wte, new_num_tokens)
|
self.wte = self._get_resized_embeddings(self.wte, new_num_tokens)
|
||||||
|
return self.wte
|
||||||
|
|
||||||
def _prune_heads(self, heads_to_prune):
|
def _prune_heads(self, heads_to_prune):
|
||||||
""" Prunes heads of the model.
|
""" Prunes heads of the model.
|
||||||
@@ -562,11 +563,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.transformer.wte.weight
|
self._tie_or_clone_weights(self.lm_head,
|
||||||
if self.config.torchscript:
|
self.transformer.wte)
|
||||||
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.lm_head.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None, head_mask=None):
|
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None, head_mask=None):
|
||||||
"""
|
"""
|
||||||
@@ -658,11 +656,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.transformer.wte.weight
|
self._tie_or_clone_weights(self.lm_head,
|
||||||
if self.config.torchscript:
|
self.transformer.wte)
|
||||||
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.lm_head.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
|
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
|
||||||
position_ids=None, past=None, head_mask=None):
|
position_ids=None, past=None, head_mask=None):
|
||||||
|
|||||||
@@ -430,6 +430,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
|||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
self.tokens_embed = self._get_resized_embeddings(self.tokens_embed, new_num_tokens)
|
self.tokens_embed = self._get_resized_embeddings(self.tokens_embed, new_num_tokens)
|
||||||
|
return self.tokens_embed
|
||||||
|
|
||||||
def _prune_heads(self, heads_to_prune):
|
def _prune_heads(self, heads_to_prune):
|
||||||
""" Prunes heads of the model.
|
""" Prunes heads of the model.
|
||||||
@@ -583,11 +584,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.transformer.tokens_embed.weight
|
self._tie_or_clone_weights(self.lm_head,
|
||||||
if self.config.torchscript:
|
self.transformer.tokens_embed)
|
||||||
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.lm_head.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, head_mask=None):
|
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, head_mask=None):
|
||||||
"""
|
"""
|
||||||
@@ -696,11 +694,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
""" Make sure we are sharing the input and output embeddings.
|
""" Make sure we are sharing the input and output embeddings.
|
||||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||||
"""
|
"""
|
||||||
input_embeddings = self.transformer.tokens_embed.weight
|
self._tie_or_clone_weights(self.lm_head,
|
||||||
if self.config.torchscript:
|
self.transformer.tokens_embed)
|
||||||
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
|
|
||||||
else:
|
|
||||||
self.lm_head.weight = input_embeddings # Tied weights
|
|
||||||
|
|
||||||
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
|
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
|
||||||
position_ids=None, head_mask=None):
|
position_ids=None, head_mask=None):
|
||||||
|
|||||||
@@ -291,6 +291,10 @@ class TransfoXLConfig(PretrainedConfig):
|
|||||||
def vocab_size(self):
|
def vocab_size(self):
|
||||||
return self.n_token
|
return self.n_token
|
||||||
|
|
||||||
|
@vocab_size.setter
|
||||||
|
def vocab_size(self, value):
|
||||||
|
self.n_token = value
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hidden_size(self):
|
def hidden_size(self):
|
||||||
return self.d_model
|
return self.d_model
|
||||||
@@ -1003,7 +1007,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
|
|||||||
self.apply(self.init_weights)
|
self.apply(self.init_weights)
|
||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
raise NotImplementedError
|
return self.word_emb
|
||||||
|
|
||||||
def backward_compatible(self):
|
def backward_compatible(self):
|
||||||
self.sample_softmax = -1
|
self.sample_softmax = -1
|
||||||
@@ -1280,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
|||||||
else:
|
else:
|
||||||
if self.config.tie_weight:
|
if self.config.tie_weight:
|
||||||
for i in range(len(self.crit.out_layers)):
|
for i in range(len(self.crit.out_layers)):
|
||||||
self.crit.out_layers[i].weight = self.transformer.word_emb.emb_layers[i].weight
|
self._tie_or_clone_weights(self.crit.out_layers[i],
|
||||||
|
self.transformer.word_emb.emb_layers[i])
|
||||||
if self.config.tie_projs:
|
if self.config.tie_projs:
|
||||||
for i, tie_proj in enumerate(self.config.tie_projs):
|
for i, tie_proj in enumerate(self.config.tie_projs):
|
||||||
if tie_proj and self.config.div_val == 1 and self.config.d_model != self.config.d_embed:
|
if tie_proj and self.config.div_val == 1 and self.config.d_model != self.config.d_embed:
|
||||||
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[0]
|
if self.config.torchscript:
|
||||||
|
self.crit.out_projs[i] = nn.Parameter(self.transformer.word_emb.emb_projs[0].clone())
|
||||||
|
else:
|
||||||
|
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[0]
|
||||||
elif tie_proj and self.config.div_val != 1:
|
elif tie_proj and self.config.div_val != 1:
|
||||||
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
|
if self.config.torchscript:
|
||||||
|
self.crit.out_projs[i] = nn.Parameter(self.transformer.word_emb.emb_projs[i].clone())
|
||||||
|
else:
|
||||||
|
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
|
||||||
|
|
||||||
def reset_length(self, tgt_len, ext_len, mem_len):
|
def reset_length(self, tgt_len, ext_len, mem_len):
|
||||||
self.transformer.reset_length(tgt_len, ext_len, mem_len)
|
self.transformer.reset_length(tgt_len, ext_len, mem_len)
|
||||||
|
|||||||
@@ -165,9 +165,27 @@ class PreTrainedModel(nn.Module):
|
|||||||
# Save config in model
|
# Save config in model
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def _get_resized_embeddings(self, old_embeddings, new_num_tokens):
|
def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
|
||||||
# Build new embeddings
|
""" Build a resized Embedding Module from a provided token Embedding Module.
|
||||||
|
Increasing the size will add newly initialized vectors at the end
|
||||||
|
Reducing the size will remove vectors from the end
|
||||||
|
|
||||||
|
Args:
|
||||||
|
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
|
||||||
|
Increasing the size will add newly initialized vectors at the end
|
||||||
|
Reducing the size will remove vectors from the end
|
||||||
|
If not provided or None: return the provided token Embedding Module.
|
||||||
|
Return:
|
||||||
|
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
|
||||||
|
"""
|
||||||
|
if new_num_tokens is None:
|
||||||
|
return old_embeddings
|
||||||
|
|
||||||
old_num_tokens, old_embedding_dim = old_embeddings.weight.size()
|
old_num_tokens, old_embedding_dim = old_embeddings.weight.size()
|
||||||
|
if old_num_tokens == new_num_tokens:
|
||||||
|
return old_embeddings
|
||||||
|
|
||||||
|
# Build new embeddings
|
||||||
new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim)
|
new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim)
|
||||||
new_embeddings.to(old_embeddings.weight.device)
|
new_embeddings.to(old_embeddings.weight.device)
|
||||||
|
|
||||||
@@ -180,18 +198,29 @@ class PreTrainedModel(nn.Module):
|
|||||||
|
|
||||||
return new_embeddings
|
return new_embeddings
|
||||||
|
|
||||||
def resize_token_embeddings(self, new_num_tokens):
|
def _tie_or_clone_weights(self, first_module, second_module):
|
||||||
""" Resize input token embeddings matrix.
|
""" Tie or clone module weights depending of weither we are using TorchScript or not
|
||||||
|
"""
|
||||||
|
if self.config.torchscript:
|
||||||
|
first_module.weight = nn.Parameter(second_module.weight.clone())
|
||||||
|
else:
|
||||||
|
first_module.weight = second_module.weight
|
||||||
|
|
||||||
|
def resize_token_embeddings(self, new_num_tokens=None):
|
||||||
|
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
new_num_tokens: New number of tokens in the embedding matrix.
|
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
|
||||||
Increasing the size will add newly initialized vectors at the end
|
Increasing the size will add newly initialized vectors at the end
|
||||||
Reducing the size will remove vectors from the end
|
Reducing the size will remove vectors from the end
|
||||||
|
If not provided or None: does nothing.
|
||||||
|
Return:
|
||||||
|
Pointer to the input tokens Embedding Module of the model
|
||||||
"""
|
"""
|
||||||
if new_num_tokens == self.config.vocab_size:
|
|
||||||
return
|
|
||||||
base_model = getattr(self, self.base_model_prefix, self) # get the base model if needed
|
base_model = getattr(self, self.base_model_prefix, self) # get the base model if needed
|
||||||
base_model._resize_token_embeddings(new_num_tokens)
|
model_embeds = base_model._resize_token_embeddings(new_num_tokens)
|
||||||
|
if new_num_tokens is None:
|
||||||
|
return model_embeds
|
||||||
|
|
||||||
# Update base model and current model config
|
# Update base model and current model config
|
||||||
self.config.vocab_size = new_num_tokens
|
self.config.vocab_size = new_num_tokens
|
||||||
@@ -201,6 +230,8 @@ class PreTrainedModel(nn.Module):
|
|||||||
if hasattr(self, 'tie_weights'):
|
if hasattr(self, 'tie_weights'):
|
||||||
self.tie_weights()
|
self.tie_weights()
|
||||||
|
|
||||||
|
return model_embeds
|
||||||
|
|
||||||
def prune_heads(self, heads_to_prune):
|
def prune_heads(self, heads_to_prune):
|
||||||
""" Prunes heads of the base model.
|
""" Prunes heads of the base model.
|
||||||
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
|
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
|
||||||
|
|||||||
@@ -184,6 +184,10 @@ class XLMConfig(PretrainedConfig):
|
|||||||
def vocab_size(self):
|
def vocab_size(self):
|
||||||
return self.n_words
|
return self.n_words
|
||||||
|
|
||||||
|
@vocab_size.setter
|
||||||
|
def vocab_size(self, value):
|
||||||
|
self.n_words = value
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hidden_size(self):
|
def hidden_size(self):
|
||||||
return self.emb_dim
|
return self.emb_dim
|
||||||
@@ -479,6 +483,7 @@ class XLMModel(XLMPreTrainedModel):
|
|||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
self.embeddings = self._get_resized_embeddings(self.embeddings, new_num_tokens)
|
self.embeddings = self._get_resized_embeddings(self.embeddings, new_num_tokens)
|
||||||
|
return self.embeddings
|
||||||
|
|
||||||
def _prune_heads(self, heads_to_prune):
|
def _prune_heads(self, heads_to_prune):
|
||||||
""" Prunes heads of the model.
|
""" Prunes heads of the model.
|
||||||
@@ -728,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
|
|||||||
def tie_weights(self):
|
def tie_weights(self):
|
||||||
""" Make sure we are sharing the embeddings
|
""" Make sure we are sharing the embeddings
|
||||||
"""
|
"""
|
||||||
if self.config.torchscript:
|
self._tie_or_clone_weights(self.pred_layer.proj, self.transformer.embeddings)
|
||||||
self.pred_layer.proj.weight = nn.Parameter(self.transformer.embeddings.weight.clone())
|
|
||||||
else:
|
|
||||||
self.pred_layer.proj.weight = self.transformer.embeddings.weight
|
|
||||||
|
|
||||||
def forward(self, input_ids, lengths=None, positions=None, langs=None, token_type_ids=None,
|
def forward(self, input_ids, lengths=None, positions=None, langs=None, token_type_ids=None,
|
||||||
attention_mask=None, cache=None, labels=None, head_mask=None):
|
attention_mask=None, cache=None, labels=None, head_mask=None):
|
||||||
|
|||||||
@@ -316,6 +316,10 @@ class XLNetConfig(PretrainedConfig):
|
|||||||
def vocab_size(self):
|
def vocab_size(self):
|
||||||
return self.n_token
|
return self.n_token
|
||||||
|
|
||||||
|
@vocab_size.setter
|
||||||
|
def vocab_size(self, value):
|
||||||
|
self.n_token = value
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hidden_size(self):
|
def hidden_size(self):
|
||||||
return self.d_model
|
return self.d_model
|
||||||
@@ -660,10 +664,10 @@ class XLNetModel(XLNetPreTrainedModel):
|
|||||||
|
|
||||||
def _resize_token_embeddings(self, new_num_tokens):
|
def _resize_token_embeddings(self, new_num_tokens):
|
||||||
self.word_embedding = self._get_resized_embeddings(self.word_embedding, new_num_tokens)
|
self.word_embedding = self._get_resized_embeddings(self.word_embedding, new_num_tokens)
|
||||||
|
return self.word_embedding
|
||||||
|
|
||||||
def _prune_heads(self, heads_to_prune):
|
def _prune_heads(self, heads_to_prune):
|
||||||
logger.info("Head pruning is not implemented for XLNet")
|
raise NotImplementedError
|
||||||
pass
|
|
||||||
|
|
||||||
def create_mask(self, qlen, mlen):
|
def create_mask(self, qlen, mlen):
|
||||||
"""
|
"""
|
||||||
@@ -987,10 +991,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
|
|||||||
def tie_weights(self):
|
def tie_weights(self):
|
||||||
""" Make sure we are sharing the embeddings
|
""" Make sure we are sharing the embeddings
|
||||||
"""
|
"""
|
||||||
if self.config.torchscript:
|
self._tie_or_clone_weights(self.lm_loss, self.transformer.word_embedding)
|
||||||
self.lm_loss.weight = nn.Parameter(self.transformer.word_embedding.weight.clone())
|
|
||||||
else:
|
|
||||||
self.lm_loss.weight = self.transformer.word_embedding.weight
|
|
||||||
|
|
||||||
def forward(self, input_ids, token_type_ids=None, input_mask=None, attention_mask=None,
|
def forward(self, input_ids, token_type_ids=None, input_mask=None, attention_mask=None,
|
||||||
mems=None, perm_mask=None, target_mapping=None, inp_q=None,
|
mems=None, perm_mask=None, target_mapping=None, inp_q=None,
|
||||||
|
|||||||
@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
|
|||||||
BertForTokenClassification, BertForMultipleChoice)
|
BertForTokenClassification, BertForMultipleChoice)
|
||||||
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|
||||||
from .modeling_common_test import (create_and_check_commons, ConfigTester, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ConfigTester, ids_tensor)
|
||||||
|
|
||||||
|
|
||||||
class BertModelTest(unittest.TestCase):
|
class BertModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
|
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
|
||||||
|
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
|
||||||
|
BertForTokenClassification)
|
||||||
|
|
||||||
class BertModelTester(object):
|
class BertModelTester(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
|
|||||||
num_labels=3,
|
num_labels=3,
|
||||||
num_choices=4,
|
num_choices=4,
|
||||||
scope=None,
|
scope=None,
|
||||||
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
|
|
||||||
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
|
|
||||||
BertForTokenClassification),
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
|
|||||||
self.num_labels = num_labels
|
self.num_labels = num_labels
|
||||||
self.num_choices = num_choices
|
self.num_choices = num_choices
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.all_model_classes = all_model_classes
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
@@ -253,16 +254,51 @@ class BertModelTest(unittest.TestCase):
|
|||||||
self.check_loss_output(result)
|
self.check_loss_output(result)
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_bert_commons(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
(config, input_ids, token_type_ids, input_mask,
|
||||||
|
sequence_labels, token_labels, choice_labels) = config_and_inputs
|
||||||
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
|
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
|
||||||
create_and_check_commons(self, config, inputs_dict)
|
return config, inputs_dict
|
||||||
|
|
||||||
def test_default(self):
|
def setUp(self):
|
||||||
self.run_tester(BertModelTest.BertModelTester(self))
|
self.model_tester = BertModelTest.BertModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
|
self.config_tester.run_common_tests()
|
||||||
config_tester.run_common_tests()
|
|
||||||
|
def test_bert_model(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_model(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_masked_lm(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_multiple_choice(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_next_sequence_prediction(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_pretraining(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_question_answering(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_sequence_classification(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_for_token_classification(self):
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
@@ -272,33 +308,5 @@ class BertModelTest(unittest.TestCase):
|
|||||||
shutil.rmtree(cache_dir)
|
shutil.rmtree(cache_dir)
|
||||||
self.assertIsNotNone(model)
|
self.assertIsNotNone(model)
|
||||||
|
|
||||||
def run_tester(self, tester):
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_model(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_pretraining(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_question_answering(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_for_token_classification(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_bert_commons(*config_and_inputs)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -39,207 +39,471 @@ def _config_zero_init(config):
|
|||||||
setattr(configs_no_init, key, 0.0)
|
setattr(configs_no_init, key, 0.0)
|
||||||
return configs_no_init
|
return configs_no_init
|
||||||
|
|
||||||
def _create_and_check_torchscript_output_attentions(tester, model_classes, config, inputs_dict):
|
class CommonTestCases:
|
||||||
config.output_attentions = True
|
|
||||||
_create_and_check_torchscript(tester, model_classes, config, inputs_dict)
|
|
||||||
|
|
||||||
def _create_and_check_torchscript_output_hidden_state(tester, model_classes, config, inputs_dict):
|
class CommonModelTester(unittest.TestCase):
|
||||||
config.output_hidden_states = True
|
|
||||||
_create_and_check_torchscript(tester, model_classes, config, inputs_dict)
|
|
||||||
|
|
||||||
def _create_and_check_torchscript(tester, model_classes, config, inputs_dict):
|
model_tester = None
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
all_model_classes = ()
|
||||||
configs_no_init.torchscript = True
|
test_torchscript = True
|
||||||
for model_class in model_classes:
|
test_pruning = True
|
||||||
model = model_class(config=configs_no_init)
|
test_resize_embeddings = True
|
||||||
model.eval()
|
|
||||||
inputs = inputs_dict['input_ids'] # Let's keep only input_ids
|
|
||||||
|
|
||||||
try:
|
def test_initialization(self):
|
||||||
torch.jit.trace(model, inputs)
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
except RuntimeError:
|
|
||||||
tester.parent.fail("Couldn't trace module.")
|
|
||||||
|
|
||||||
try:
|
configs_no_init = _config_zero_init(config)
|
||||||
traced_gpt2 = torch.jit.trace(model, inputs)
|
for model_class in self.all_model_classes:
|
||||||
torch.jit.save(traced_gpt2, "traced_model.pt")
|
model = model_class(config=configs_no_init)
|
||||||
except RuntimeError:
|
for name, param in model.named_parameters():
|
||||||
tester.parent.fail("Couldn't save module.")
|
if param.requires_grad:
|
||||||
|
self.assertIn(param.data.mean().item(), [0.0, 1.0],
|
||||||
|
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
|
||||||
|
|
||||||
try:
|
def test_attention_outputs(self):
|
||||||
loaded_model = torch.jit.load("traced_model.pt")
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
os.remove("traced_model.pt")
|
|
||||||
except ValueError:
|
|
||||||
tester.parent.fail("Couldn't load module.")
|
|
||||||
|
|
||||||
model.eval()
|
for model_class in self.all_model_classes:
|
||||||
loaded_model.eval()
|
config.output_attentions = True
|
||||||
|
config.output_hidden_states = False
|
||||||
|
model = model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(**inputs_dict)
|
||||||
|
attentions = outputs[-1]
|
||||||
|
self.assertEqual(model.config.output_attentions, True)
|
||||||
|
self.assertEqual(model.config.output_hidden_states, False)
|
||||||
|
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
||||||
|
self.assertListEqual(
|
||||||
|
list(attentions[0].shape[-3:]),
|
||||||
|
[self.model_tester.num_attention_heads,
|
||||||
|
self.model_tester.seq_length,
|
||||||
|
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
|
||||||
|
out_len = len(outputs)
|
||||||
|
|
||||||
model_params = model.parameters()
|
# Check attention is always last and order is fine
|
||||||
loaded_model_params = loaded_model.parameters()
|
config.output_attentions = True
|
||||||
|
config.output_hidden_states = True
|
||||||
|
model = model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(**inputs_dict)
|
||||||
|
self.assertEqual(out_len+1, len(outputs))
|
||||||
|
self.assertEqual(model.config.output_attentions, True)
|
||||||
|
self.assertEqual(model.config.output_hidden_states, True)
|
||||||
|
|
||||||
models_equal = True
|
attentions = outputs[-1]
|
||||||
for p1, p2 in zip(model_params, loaded_model_params):
|
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
self.assertListEqual(
|
||||||
models_equal = False
|
list(attentions[0].shape[-3:]),
|
||||||
|
[self.model_tester.num_attention_heads,
|
||||||
|
self.model_tester.seq_length,
|
||||||
|
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
|
||||||
|
|
||||||
tester.parent.assertTrue(models_equal)
|
def test_torchscript(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
def _create_and_check_initialization(tester, model_classes, config, inputs_dict):
|
self._create_and_check_torchscript(config, inputs_dict)
|
||||||
configs_no_init = _config_zero_init(config)
|
|
||||||
for model_class in model_classes:
|
|
||||||
model = model_class(config=configs_no_init)
|
|
||||||
for name, param in model.named_parameters():
|
|
||||||
if param.requires_grad:
|
|
||||||
tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0],
|
|
||||||
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
|
|
||||||
|
|
||||||
def _create_and_check_for_headmasking(tester, model_classes, config, inputs_dict):
|
def test_torchscript_output_attentions(self):
|
||||||
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
for model_class in model_classes:
|
|
||||||
config.output_attentions = True
|
|
||||||
config.output_hidden_states = True
|
|
||||||
model = model_class(config=configs_no_init)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# Prepare head_mask
|
config.output_attentions = True
|
||||||
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
|
self._create_and_check_torchscript(config, inputs_dict)
|
||||||
head_mask = torch.ones(tester.num_hidden_layers, tester.num_attention_heads)
|
|
||||||
head_mask[0, 0] = 0
|
|
||||||
head_mask[-1, :-1] = 0
|
|
||||||
head_mask.requires_grad_(requires_grad=True)
|
|
||||||
inputs = inputs_dict.copy()
|
|
||||||
inputs['head_mask'] = head_mask
|
|
||||||
|
|
||||||
outputs = model(**inputs)
|
def test_torchscript_output_hidden_state(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
# Test that we can get a gradient back for importance score computation
|
config.output_hidden_states = True
|
||||||
output = sum(t.sum() for t in outputs[0])
|
self._create_and_check_torchscript(config, inputs_dict)
|
||||||
output = output.sum()
|
|
||||||
output.backward()
|
|
||||||
multihead_outputs = head_mask.grad
|
|
||||||
|
|
||||||
attentions = outputs[-1]
|
def _create_and_check_torchscript(self, config, inputs_dict):
|
||||||
hidden_states = outputs[-2]
|
if not self.test_torchscript:
|
||||||
|
return
|
||||||
|
|
||||||
# Remove Nan
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
|
configs_no_init.torchscript = True
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
model = model_class(config=configs_no_init)
|
||||||
|
model.eval()
|
||||||
|
inputs = inputs_dict['input_ids'] # Let's keep only input_ids
|
||||||
|
|
||||||
tester.parent.assertIsNotNone(multihead_outputs)
|
try:
|
||||||
tester.parent.assertEqual(len(multihead_outputs), tester.num_hidden_layers)
|
torch.jit.trace(model, inputs)
|
||||||
tester.parent.assertAlmostEqual(
|
except RuntimeError:
|
||||||
attentions[0][..., 0, :, :].flatten().sum().item(), 0.0)
|
self.fail("Couldn't trace module.")
|
||||||
tester.parent.assertNotEqual(
|
|
||||||
attentions[0][..., -1, :, :].flatten().sum().item(), 0.0)
|
try:
|
||||||
tester.parent.assertNotEqual(
|
traced_gpt2 = torch.jit.trace(model, inputs)
|
||||||
attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
|
torch.jit.save(traced_gpt2, "traced_model.pt")
|
||||||
tester.parent.assertAlmostEqual(
|
except RuntimeError:
|
||||||
attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
|
self.fail("Couldn't save module.")
|
||||||
tester.parent.assertNotEqual(
|
|
||||||
attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
|
try:
|
||||||
|
loaded_model = torch.jit.load("traced_model.pt")
|
||||||
|
os.remove("traced_model.pt")
|
||||||
|
except ValueError:
|
||||||
|
self.fail("Couldn't load module.")
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
loaded_model.eval()
|
||||||
|
|
||||||
|
model_params = model.parameters()
|
||||||
|
loaded_model_params = loaded_model.parameters()
|
||||||
|
|
||||||
|
models_equal = True
|
||||||
|
for p1, p2 in zip(model_params, loaded_model_params):
|
||||||
|
if p1.data.ne(p2.data).sum() > 0:
|
||||||
|
models_equal = False
|
||||||
|
|
||||||
|
self.assertTrue(models_equal)
|
||||||
|
|
||||||
|
|
||||||
def _create_and_check_for_head_pruning(tester, model_classes, config, inputs_dict):
|
def test_headmasking(self):
|
||||||
for model_class in model_classes:
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
config.output_attentions = True
|
|
||||||
config.output_hidden_states = False
|
|
||||||
model = model_class(config=config)
|
|
||||||
model.eval()
|
|
||||||
heads_to_prune = {0: list(range(1, tester.num_attention_heads)),
|
|
||||||
-1: [0]}
|
|
||||||
model.prune_heads(heads_to_prune)
|
|
||||||
outputs = model(**inputs_dict)
|
|
||||||
|
|
||||||
attentions = outputs[-1]
|
config.output_attentions = True
|
||||||
|
config.output_hidden_states = True
|
||||||
|
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
model = model_class(config=configs_no_init)
|
||||||
|
model.eval()
|
||||||
|
|
||||||
tester.parent.assertEqual(
|
# Prepare head_mask
|
||||||
attentions[0].shape[-3], 1)
|
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
|
||||||
tester.parent.assertEqual(
|
head_mask = torch.ones(self.model_tester.num_hidden_layers, self.model_tester.num_attention_heads)
|
||||||
attentions[1].shape[-3], tester.num_attention_heads)
|
head_mask[0, 0] = 0
|
||||||
tester.parent.assertEqual(
|
head_mask[-1, :-1] = 0
|
||||||
attentions[-1].shape[-3], tester.num_attention_heads - 1)
|
head_mask.requires_grad_(requires_grad=True)
|
||||||
|
inputs = inputs_dict.copy()
|
||||||
|
inputs['head_mask'] = head_mask
|
||||||
|
|
||||||
|
outputs = model(**inputs)
|
||||||
|
|
||||||
|
# Test that we can get a gradient back for importance score computation
|
||||||
|
output = sum(t.sum() for t in outputs[0])
|
||||||
|
output = output.sum()
|
||||||
|
output.backward()
|
||||||
|
multihead_outputs = head_mask.grad
|
||||||
|
|
||||||
|
attentions = outputs[-1]
|
||||||
|
hidden_states = outputs[-2]
|
||||||
|
|
||||||
|
# Remove Nan
|
||||||
|
|
||||||
|
self.assertIsNotNone(multihead_outputs)
|
||||||
|
self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers)
|
||||||
|
self.assertAlmostEqual(
|
||||||
|
attentions[0][..., 0, :, :].flatten().sum().item(), 0.0)
|
||||||
|
self.assertNotEqual(
|
||||||
|
attentions[0][..., -1, :, :].flatten().sum().item(), 0.0)
|
||||||
|
self.assertNotEqual(
|
||||||
|
attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
|
||||||
|
self.assertAlmostEqual(
|
||||||
|
attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
|
||||||
|
self.assertNotEqual(
|
||||||
|
attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
|
||||||
|
|
||||||
|
|
||||||
def _create_and_check_for_attentions(tester, model_classes, config, inputs_dict):
|
def test_head_pruning(self):
|
||||||
for model_class in model_classes:
|
if not self.test_pruning:
|
||||||
config.output_attentions = True
|
return
|
||||||
config.output_hidden_states = False
|
|
||||||
model = model_class(config)
|
|
||||||
model.eval()
|
|
||||||
outputs = model(**inputs_dict)
|
|
||||||
attentions = outputs[-1]
|
|
||||||
tester.parent.assertEqual(model.config.output_attentions, True)
|
|
||||||
tester.parent.assertEqual(model.config.output_hidden_states, False)
|
|
||||||
tester.parent.assertEqual(len(attentions), tester.num_hidden_layers)
|
|
||||||
tester.parent.assertListEqual(
|
|
||||||
list(attentions[0].shape[-3:]),
|
|
||||||
[tester.num_attention_heads,
|
|
||||||
tester.seq_length,
|
|
||||||
tester.key_len if hasattr(tester, 'key_len') else tester.seq_length])
|
|
||||||
out_len = len(outputs)
|
|
||||||
|
|
||||||
# Check attention is always last and order is fine
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
config.output_attentions = True
|
|
||||||
config.output_hidden_states = True
|
|
||||||
model = model_class(config)
|
|
||||||
model.eval()
|
|
||||||
outputs = model(**inputs_dict)
|
|
||||||
tester.parent.assertEqual(out_len+1, len(outputs))
|
|
||||||
tester.parent.assertEqual(model.config.output_attentions, True)
|
|
||||||
tester.parent.assertEqual(model.config.output_hidden_states, True)
|
|
||||||
|
|
||||||
attentions = outputs[-1]
|
for model_class in self.all_model_classes:
|
||||||
tester.parent.assertEqual(len(attentions), tester.num_hidden_layers)
|
config.output_attentions = True
|
||||||
tester.parent.assertListEqual(
|
config.output_hidden_states = False
|
||||||
list(attentions[0].shape[-3:]),
|
model = model_class(config=config)
|
||||||
[tester.num_attention_heads,
|
model.eval()
|
||||||
tester.seq_length,
|
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)),
|
||||||
tester.key_len if hasattr(tester, 'key_len') else tester.seq_length])
|
-1: [0]}
|
||||||
|
model.prune_heads(heads_to_prune)
|
||||||
|
outputs = model(**inputs_dict)
|
||||||
|
|
||||||
def _create_and_check_for_hidden_states(tester, model_classes, config, inputs_dict):
|
attentions = outputs[-1]
|
||||||
for model_class in model_classes:
|
|
||||||
config.output_hidden_states = True
|
self.assertEqual(
|
||||||
config.output_attentions = False
|
attentions[0].shape[-3], 1)
|
||||||
model = model_class(config)
|
self.assertEqual(
|
||||||
model.eval()
|
attentions[1].shape[-3], self.model_tester.num_attention_heads)
|
||||||
outputs = model(**inputs_dict)
|
self.assertEqual(
|
||||||
hidden_states = outputs[-1]
|
attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
|
||||||
tester.parent.assertEqual(model.config.output_attentions, False)
|
|
||||||
tester.parent.assertEqual(model.config.output_hidden_states, True)
|
|
||||||
tester.parent.assertEqual(len(hidden_states), tester.num_hidden_layers + 1)
|
|
||||||
tester.parent.assertListEqual(
|
|
||||||
list(hidden_states[0].shape[-2:]),
|
|
||||||
[tester.seq_length, tester.hidden_size])
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_commons(tester, config, inputs_dict, test_pruning=True, test_torchscript=True):
|
def test_hidden_states_output(self):
|
||||||
_create_and_check_initialization(tester, tester.all_model_classes, config, inputs_dict)
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
_create_and_check_for_attentions(tester, tester.all_model_classes, config, inputs_dict)
|
|
||||||
_create_and_check_for_headmasking(tester, tester.all_model_classes, config, inputs_dict)
|
|
||||||
_create_and_check_for_hidden_states(tester, tester.all_model_classes, config, inputs_dict)
|
|
||||||
|
|
||||||
if test_torchscript:
|
for model_class in self.all_model_classes:
|
||||||
_create_and_check_torchscript(tester, tester.all_model_classes, config, inputs_dict)
|
config.output_hidden_states = True
|
||||||
_create_and_check_torchscript_output_attentions(tester, tester.all_model_classes, config, inputs_dict)
|
config.output_attentions = False
|
||||||
_create_and_check_torchscript_output_hidden_state(tester, tester.all_model_classes, config, inputs_dict)
|
model = model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(**inputs_dict)
|
||||||
|
hidden_states = outputs[-1]
|
||||||
|
self.assertEqual(model.config.output_attentions, False)
|
||||||
|
self.assertEqual(model.config.output_hidden_states, True)
|
||||||
|
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
|
||||||
|
self.assertListEqual(
|
||||||
|
list(hidden_states[0].shape[-2:]),
|
||||||
|
[self.model_tester.seq_length, self.model_tester.hidden_size])
|
||||||
|
|
||||||
if test_pruning:
|
def test_resize_tokens_embeddings(self):
|
||||||
_create_and_check_for_head_pruning(tester, tester.all_model_classes, config, inputs_dict)
|
original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
if not self.test_resize_embeddings:
|
||||||
|
return
|
||||||
|
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
config = copy.deepcopy(original_config)
|
||||||
|
model = model_class(config)
|
||||||
|
|
||||||
|
model_vocab_size = config.vocab_size
|
||||||
|
# Retrieve the embeddings and clone theme
|
||||||
|
model_embed = model.resize_token_embeddings(model_vocab_size)
|
||||||
|
cloned_embeddings = model_embed.weight.clone()
|
||||||
|
|
||||||
|
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
||||||
|
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
||||||
|
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
|
||||||
|
# Check that it actually resizes the embeddings matrix
|
||||||
|
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
||||||
|
|
||||||
|
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
||||||
|
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
||||||
|
self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
|
||||||
|
# Check that it actually resizes the embeddings matrix
|
||||||
|
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
||||||
|
|
||||||
|
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
|
||||||
|
models_equal = True
|
||||||
|
for p1, p2 in zip(cloned_embeddings, model_embed.weight):
|
||||||
|
if p1.data.ne(p2.data).sum() > 0:
|
||||||
|
models_equal = False
|
||||||
|
|
||||||
|
self.assertTrue(models_equal)
|
||||||
|
|
||||||
|
def test_tie_model_weights(self):
|
||||||
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
def check_same_values(layer_1, layer_2):
|
||||||
|
equal = True
|
||||||
|
for p1, p2 in zip(layer_1.weight, layer_2.weight):
|
||||||
|
if p1.data.ne(p2.data).sum() > 0:
|
||||||
|
equal = False
|
||||||
|
return equal
|
||||||
|
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
if not hasattr(model_class, 'tie_weights'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
config.torchscript = True
|
||||||
|
model_not_tied = model_class(config)
|
||||||
|
params_not_tied = list(model_not_tied.parameters())
|
||||||
|
|
||||||
|
config_tied = copy.deepcopy(config)
|
||||||
|
config_tied.torchscript = False
|
||||||
|
model_tied = model_class(config_tied)
|
||||||
|
params_tied = list(model_tied.parameters())
|
||||||
|
|
||||||
|
# Check that the embedding layer and decoding layer are the same in size and in value
|
||||||
|
self.assertGreater(len(params_not_tied), len(params_tied))
|
||||||
|
# self.assertTrue(check_same_values(embeddings, decoding))
|
||||||
|
|
||||||
|
# # Check that after modification, they remain the same.
|
||||||
|
# embeddings.weight.data.div_(2)
|
||||||
|
# # Check that the embedding layer and decoding layer are the same in size and in value
|
||||||
|
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
|
||||||
|
# self.assertTrue(check_same_values(embeddings, decoding))
|
||||||
|
|
||||||
|
# # Check that after modification, they remain the same.
|
||||||
|
# decoding.weight.data.div_(4)
|
||||||
|
# # Check that the embedding layer and decoding layer are the same in size and in value
|
||||||
|
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
|
||||||
|
# self.assertTrue(check_same_values(embeddings, decoding))
|
||||||
|
|
||||||
|
# Check that after resize they remain tied.
|
||||||
|
model_tied.resize_token_embeddings(config.vocab_size + 10)
|
||||||
|
params_tied_2 = list(model_tied.parameters())
|
||||||
|
self.assertGreater(len(params_not_tied), len(params_tied))
|
||||||
|
self.assertEqual(len(params_tied_2), len(params_tied))
|
||||||
|
|
||||||
|
# decoding.weight.data.mul_(20)
|
||||||
|
# # Check that the embedding layer and decoding layer are the same in size and in value
|
||||||
|
# self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
|
||||||
|
# self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
|
||||||
|
|
||||||
|
|
||||||
def ids_tensor(shape, vocab_size, rng=None, name=None):
|
class GPTModelTester(CommonModelTester):
|
||||||
"""Creates a random int32 tensor of the shape within the vocab size."""
|
|
||||||
if rng is None:
|
|
||||||
rng = random.Random()
|
|
||||||
|
|
||||||
total_dims = 1
|
def __init__(self,
|
||||||
for dim in shape:
|
parent,
|
||||||
total_dims *= dim
|
batch_size=13,
|
||||||
|
seq_length=7,
|
||||||
|
is_training=True,
|
||||||
|
use_position_ids=True,
|
||||||
|
use_token_type_ids=True,
|
||||||
|
use_labels=True,
|
||||||
|
vocab_size=99,
|
||||||
|
n_positions=33,
|
||||||
|
hidden_size=32,
|
||||||
|
num_hidden_layers=5,
|
||||||
|
num_attention_heads=4,
|
||||||
|
n_choices=3,
|
||||||
|
type_sequence_label_size=2,
|
||||||
|
initializer_range=0.02,
|
||||||
|
num_labels=3,
|
||||||
|
scope=None,
|
||||||
|
config_class=None,
|
||||||
|
base_model_class=None,
|
||||||
|
lm_head_model_class=None,
|
||||||
|
double_head_model_class=None,
|
||||||
|
):
|
||||||
|
self.parent = parent
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.seq_length = seq_length
|
||||||
|
self.is_training = is_training
|
||||||
|
self.use_position_ids = use_position_ids
|
||||||
|
self.use_token_type_ids = use_token_type_ids
|
||||||
|
self.use_labels = use_labels
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.n_positions = n_positions
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.n_choices = n_choices
|
||||||
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
|
self.initializer_range = initializer_range
|
||||||
|
self.num_labels = num_labels
|
||||||
|
self.scope = scope
|
||||||
|
self.config_class = config_class
|
||||||
|
self.base_model_class = base_model_class
|
||||||
|
self.lm_head_model_class = lm_head_model_class
|
||||||
|
self.double_head_model_class = double_head_model_class
|
||||||
|
self.all_model_classes = (base_model_class, lm_head_model_class, double_head_model_class)
|
||||||
|
|
||||||
values = []
|
def prepare_config_and_inputs(self):
|
||||||
for _ in range(total_dims):
|
total_num_tokens = self.vocab_size
|
||||||
values.append(rng.randint(0, vocab_size - 1))
|
input_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_num_tokens)
|
||||||
|
|
||||||
return torch.tensor(data=values, dtype=torch.long).view(shape).contiguous()
|
position_ids = None
|
||||||
|
if self.use_position_ids:
|
||||||
|
position_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.n_positions)
|
||||||
|
|
||||||
|
token_type_ids = None
|
||||||
|
if self.use_token_type_ids:
|
||||||
|
total_voc = self.vocab_size
|
||||||
|
token_type_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)
|
||||||
|
|
||||||
|
mc_labels = None
|
||||||
|
lm_labels = None
|
||||||
|
mc_token_ids = None
|
||||||
|
if self.use_labels:
|
||||||
|
mc_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
||||||
|
lm_labels = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.num_labels)
|
||||||
|
mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length)
|
||||||
|
|
||||||
|
config = self.config_class(
|
||||||
|
vocab_size_or_config_json_file=self.vocab_size,
|
||||||
|
n_positions=self.n_positions,
|
||||||
|
n_embd=self.hidden_size,
|
||||||
|
n_layer=self.num_hidden_layers,
|
||||||
|
n_head=self.num_attention_heads,
|
||||||
|
initializer_range=self.initializer_range)
|
||||||
|
|
||||||
|
return (config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids)
|
||||||
|
|
||||||
|
def create_and_check_base_model(self, config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids):
|
||||||
|
model = self.base_model_class(config)
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
outputs = model(input_ids, position_ids, token_type_ids)
|
||||||
|
outputs = model(input_ids, position_ids)
|
||||||
|
outputs = model(input_ids)
|
||||||
|
|
||||||
|
hidden_state = outputs[0]
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(hidden_state.size()),
|
||||||
|
[self.batch_size, self.n_choices, self.seq_length, self.hidden_size])
|
||||||
|
|
||||||
|
|
||||||
|
def create_and_check_lm_head(self, config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids):
|
||||||
|
model = self.lm_head_model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(input_ids, position_ids, token_type_ids, lm_labels)
|
||||||
|
loss, lm_logits = outputs[:2]
|
||||||
|
|
||||||
|
total_voc = self.vocab_size
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(lm_logits.size()),
|
||||||
|
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(loss.size()),
|
||||||
|
[])
|
||||||
|
|
||||||
|
def create_and_check_presents(self, config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids):
|
||||||
|
for model_class in self.all_model_classes:
|
||||||
|
model = model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(input_ids)
|
||||||
|
presents = outputs[-1]
|
||||||
|
self.parent.assertEqual(self.num_hidden_layers, len(presents))
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(presents[0].size()),
|
||||||
|
[2, self.batch_size * self.n_choices, self.num_attention_heads,
|
||||||
|
self.seq_length, self.hidden_size // self.num_attention_heads])
|
||||||
|
|
||||||
|
def create_and_check_double_heads(self, config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids):
|
||||||
|
model = self.double_head_model_class(config)
|
||||||
|
model.eval()
|
||||||
|
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
|
||||||
|
token_type_ids=token_type_ids, position_ids=position_ids)
|
||||||
|
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
|
||||||
|
loss = [lm_loss, mc_loss]
|
||||||
|
|
||||||
|
total_voc = self.vocab_size
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(lm_logits.size()),
|
||||||
|
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
list(mc_logits.size()),
|
||||||
|
[self.batch_size, self.n_choices])
|
||||||
|
self.parent.assertListEqual(
|
||||||
|
[list(l.size()) for l in loss],
|
||||||
|
[[], []])
|
||||||
|
|
||||||
|
def create_and_check_model_from_pretrained(self):
|
||||||
|
cache_dir = "/tmp/pytorch_transformers_test/"
|
||||||
|
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
|
||||||
|
model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir)
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
self.parent.assertIsNotNone(model)
|
||||||
|
|
||||||
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
(config, input_ids, token_type_ids, position_ids,
|
||||||
|
mc_labels, lm_labels, mc_token_ids) = config_and_inputs
|
||||||
|
inputs_dict = {'input_ids': input_ids}
|
||||||
|
return config, inputs_dict
|
||||||
|
|
||||||
|
def run_common_tests(self, test_presents=False):
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
self.create_and_check_base_model(*config_and_inputs)
|
||||||
|
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
self.create_and_check_lm_head(*config_and_inputs)
|
||||||
|
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
self.create_and_check_double_heads(*config_and_inputs)
|
||||||
|
|
||||||
|
if test_presents:
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
self.create_and_check_presents(*config_and_inputs)
|
||||||
|
|
||||||
|
def run_slow_tests(self):
|
||||||
|
self.create_and_check_model_from_pretrained()
|
||||||
|
|
||||||
|
|
||||||
class ConfigTester(object):
|
class ConfigTester(object):
|
||||||
@@ -275,179 +539,22 @@ class ConfigTester(object):
|
|||||||
self.create_and_test_config_to_json_file()
|
self.create_and_test_config_to_json_file()
|
||||||
|
|
||||||
|
|
||||||
class GPTModelTester(object):
|
|
||||||
def __init__(self,
|
|
||||||
parent,
|
|
||||||
batch_size=13,
|
|
||||||
seq_length=7,
|
|
||||||
is_training=True,
|
|
||||||
use_position_ids=True,
|
|
||||||
use_token_type_ids=True,
|
|
||||||
use_labels=True,
|
|
||||||
vocab_size=99,
|
|
||||||
n_positions=33,
|
|
||||||
hidden_size=32,
|
|
||||||
num_hidden_layers=5,
|
|
||||||
num_attention_heads=4,
|
|
||||||
n_choices=3,
|
|
||||||
type_sequence_label_size=2,
|
|
||||||
initializer_range=0.02,
|
|
||||||
num_labels=3,
|
|
||||||
scope=None,
|
|
||||||
config_class=None,
|
|
||||||
base_model_class=None,
|
|
||||||
lm_head_model_class=None,
|
|
||||||
double_head_model_class=None,
|
|
||||||
):
|
|
||||||
self.parent = parent
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.seq_length = seq_length
|
|
||||||
self.is_training = is_training
|
|
||||||
self.use_position_ids = use_position_ids
|
|
||||||
self.use_token_type_ids = use_token_type_ids
|
|
||||||
self.use_labels = use_labels
|
|
||||||
self.vocab_size = vocab_size
|
|
||||||
self.n_positions = n_positions
|
|
||||||
self.hidden_size = hidden_size
|
|
||||||
self.num_hidden_layers = num_hidden_layers
|
|
||||||
self.num_attention_heads = num_attention_heads
|
|
||||||
self.n_choices = n_choices
|
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
|
||||||
self.initializer_range = initializer_range
|
|
||||||
self.num_labels = num_labels
|
|
||||||
self.scope = scope
|
|
||||||
self.config_class = config_class
|
|
||||||
self.base_model_class = base_model_class
|
|
||||||
self.lm_head_model_class = lm_head_model_class
|
|
||||||
self.double_head_model_class = double_head_model_class
|
|
||||||
self.all_model_classes = (base_model_class, lm_head_model_class, double_head_model_class)
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
|
||||||
total_num_tokens = self.vocab_size
|
|
||||||
input_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_num_tokens)
|
|
||||||
|
|
||||||
position_ids = None
|
|
||||||
if self.use_position_ids:
|
|
||||||
position_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.n_positions)
|
|
||||||
|
|
||||||
token_type_ids = None
|
|
||||||
if self.use_token_type_ids:
|
|
||||||
total_voc = self.vocab_size
|
|
||||||
token_type_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)
|
|
||||||
|
|
||||||
mc_labels = None
|
|
||||||
lm_labels = None
|
|
||||||
mc_token_ids = None
|
|
||||||
if self.use_labels:
|
|
||||||
mc_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
|
|
||||||
lm_labels = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.num_labels)
|
|
||||||
mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length)
|
|
||||||
|
|
||||||
config = self.config_class(
|
|
||||||
vocab_size_or_config_json_file=self.vocab_size,
|
|
||||||
n_positions=self.n_positions,
|
|
||||||
n_embd=self.hidden_size,
|
|
||||||
n_layer=self.num_hidden_layers,
|
|
||||||
n_head=self.num_attention_heads,
|
|
||||||
initializer_range=self.initializer_range)
|
|
||||||
|
|
||||||
return (config, input_ids, token_type_ids, position_ids,
|
|
||||||
mc_labels, lm_labels, mc_token_ids)
|
|
||||||
|
|
||||||
def create_and_check_base_model(self, config, input_ids, token_type_ids, position_ids,
|
|
||||||
mc_labels, lm_labels, mc_token_ids):
|
|
||||||
model = self.base_model_class(config)
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
outputs = model(input_ids, position_ids, token_type_ids)
|
|
||||||
outputs = model(input_ids, position_ids)
|
|
||||||
outputs = model(input_ids)
|
|
||||||
|
|
||||||
hidden_state = outputs[0]
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(hidden_state.size()),
|
|
||||||
[self.batch_size, self.n_choices, self.seq_length, self.hidden_size])
|
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_lm_head(self, config, input_ids, token_type_ids, position_ids,
|
def ids_tensor(shape, vocab_size, rng=None, name=None):
|
||||||
mc_labels, lm_labels, mc_token_ids):
|
"""Creates a random int32 tensor of the shape within the vocab size."""
|
||||||
model = self.lm_head_model_class(config)
|
if rng is None:
|
||||||
model.eval()
|
rng = random.Random()
|
||||||
outputs = model(input_ids, position_ids, token_type_ids, lm_labels)
|
|
||||||
loss, lm_logits = outputs[:2]
|
|
||||||
|
|
||||||
total_voc = self.vocab_size
|
total_dims = 1
|
||||||
self.parent.assertListEqual(
|
for dim in shape:
|
||||||
list(lm_logits.size()),
|
total_dims *= dim
|
||||||
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(loss.size()),
|
|
||||||
[])
|
|
||||||
|
|
||||||
def create_and_check_presents(self, config, input_ids, token_type_ids, position_ids,
|
values = []
|
||||||
mc_labels, lm_labels, mc_token_ids):
|
for _ in range(total_dims):
|
||||||
for model_class in self.all_model_classes:
|
values.append(rng.randint(0, vocab_size - 1))
|
||||||
model = model_class(config)
|
|
||||||
model.eval()
|
|
||||||
outputs = model(input_ids)
|
|
||||||
presents = outputs[-1]
|
|
||||||
self.parent.assertEqual(self.num_hidden_layers, len(presents))
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(presents[0].size()),
|
|
||||||
[2, self.batch_size * self.n_choices, self.num_attention_heads,
|
|
||||||
self.seq_length, self.hidden_size // self.num_attention_heads])
|
|
||||||
|
|
||||||
def create_and_check_double_heads(self, config, input_ids, token_type_ids, position_ids,
|
return torch.tensor(data=values, dtype=torch.long).view(shape).contiguous()
|
||||||
mc_labels, lm_labels, mc_token_ids):
|
|
||||||
model = self.double_head_model_class(config)
|
|
||||||
model.eval()
|
|
||||||
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
|
|
||||||
token_type_ids=token_type_ids, position_ids=position_ids)
|
|
||||||
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
|
|
||||||
loss = [lm_loss, mc_loss]
|
|
||||||
|
|
||||||
total_voc = self.vocab_size
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(lm_logits.size()),
|
|
||||||
[self.batch_size, self.n_choices, self.seq_length, total_voc])
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
list(mc_logits.size()),
|
|
||||||
[self.batch_size, self.n_choices])
|
|
||||||
self.parent.assertListEqual(
|
|
||||||
[list(l.size()) for l in loss],
|
|
||||||
[[], []])
|
|
||||||
|
|
||||||
def create_and_check_model_from_pretrained(self):
|
|
||||||
cache_dir = "/tmp/pytorch_transformers_test/"
|
|
||||||
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
|
|
||||||
model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir)
|
|
||||||
shutil.rmtree(cache_dir)
|
|
||||||
self.parent.assertIsNotNone(model)
|
|
||||||
|
|
||||||
def create_and_check_commons(self, config, input_ids, token_type_ids, position_ids,
|
|
||||||
mc_labels, lm_labels, mc_token_ids):
|
|
||||||
inputs_dict = {'input_ids': input_ids}
|
|
||||||
create_and_check_commons(self, config, inputs_dict)
|
|
||||||
|
|
||||||
def run_common_tests(self, test_presents=False):
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
self.create_and_check_base_model(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
self.create_and_check_lm_head(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
self.create_and_check_double_heads(*config_and_inputs)
|
|
||||||
|
|
||||||
if test_presents:
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
self.create_and_check_presents(*config_and_inputs)
|
|
||||||
|
|
||||||
config_and_inputs = self.prepare_config_and_inputs()
|
|
||||||
self.create_and_check_commons(*config_and_inputs)
|
|
||||||
|
|
||||||
def run_slow_tests(self):
|
|
||||||
self.create_and_check_model_from_pretrained()
|
|
||||||
|
|
||||||
|
|
||||||
class ModelUtilsTest(unittest.TestCase):
|
class ModelUtilsTest(unittest.TestCase):
|
||||||
@@ -471,79 +578,6 @@ class ModelUtilsTest(unittest.TestCase):
|
|||||||
self.assertEqual(model.config.output_hidden_states, True)
|
self.assertEqual(model.config.output_hidden_states, True)
|
||||||
self.assertEqual(model.config, config)
|
self.assertEqual(model.config, config)
|
||||||
|
|
||||||
def test_resize_tokens_embeddings(self):
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
|
|
||||||
for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
|
||||||
config = BertConfig.from_pretrained(model_name)
|
|
||||||
model = BertModel.from_pretrained(model_name)
|
|
||||||
|
|
||||||
model_vocab_size = config.vocab_size
|
|
||||||
# Retrieve the embeddings and clone theme
|
|
||||||
cloned_embeddings = model.embeddings.word_embeddings.weight.clone()
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model.embeddings.word_embeddings.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model.resize_token_embeddings(model_vocab_size)
|
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model.embeddings.word_embeddings.weight.shape[0], cloned_embeddings.shape[0])
|
|
||||||
|
|
||||||
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
|
|
||||||
models_equal = True
|
|
||||||
for p1, p2 in zip(cloned_embeddings, model.embeddings.word_embeddings.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
models_equal = False
|
|
||||||
|
|
||||||
self.assertTrue(models_equal)
|
|
||||||
|
|
||||||
def test_tie_model_weights(self):
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
def check_same_values(layer_1, layer_2):
|
|
||||||
equal = True
|
|
||||||
for p1, p2 in zip(layer_1.weight, layer_2.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
equal = False
|
|
||||||
return equal
|
|
||||||
|
|
||||||
for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
|
||||||
config = GPT2Config.from_pretrained(model_name)
|
|
||||||
model = GPT2LMHeadModel.from_pretrained(model_name)
|
|
||||||
|
|
||||||
# Get the embeddings and decoding layer
|
|
||||||
embeddings = model.transformer.wte
|
|
||||||
decoding = model.lm_head
|
|
||||||
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
|
|
||||||
self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after modification, they remain the same.
|
|
||||||
embeddings.weight.data.div_(2)
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
|
|
||||||
self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after modification, they remain the same.
|
|
||||||
decoding.weight.data.div_(4)
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
|
|
||||||
self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after resize they remain tied.
|
|
||||||
model.resize_token_embeddings(config.vocab_size + 10)
|
|
||||||
decoding.weight.data.mul_(20)
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
|
|
||||||
self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -16,19 +16,14 @@ from __future__ import absolute_import
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import os
|
|
||||||
import unittest
|
import unittest
|
||||||
import json
|
|
||||||
import random
|
|
||||||
import shutil
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from pytorch_transformers import (GPT2Config, GPT2Model,
|
from pytorch_transformers import (GPT2Config, GPT2Model,
|
||||||
GPT2LMHeadModel, GPT2DoubleHeadsModel)
|
GPT2LMHeadModel, GPT2DoubleHeadsModel)
|
||||||
|
|
||||||
from .modeling_common_test import (create_and_check_commons, ConfigTester, GPTModelTester)
|
from .modeling_common_test import CommonTestCases, ConfigTester
|
||||||
|
|
||||||
class GPT2ModelTest(unittest.TestCase):
|
class GPT2ModelTest(unittest.TestCase):
|
||||||
|
|
||||||
@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
|
|||||||
config_tester.run_common_tests()
|
config_tester.run_common_tests()
|
||||||
|
|
||||||
def test_model(self):
|
def test_model(self):
|
||||||
model_tester = GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
|
model_tester = CommonTestCases.GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
|
||||||
lm_head_model_class=GPT2LMHeadModel,
|
lm_head_model_class=GPT2LMHeadModel,
|
||||||
double_head_model_class=GPT2DoubleHeadsModel)
|
double_head_model_class=GPT2DoubleHeadsModel)
|
||||||
model_tester.run_common_tests(test_presents=True)
|
model_tester.run_common_tests(test_presents=True)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_pretrained(self):
|
def test_pretrained(self):
|
||||||
model_tester = GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
|
model_tester = CommonTestCases.GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
|
||||||
lm_head_model_class=GPT2LMHeadModel,
|
lm_head_model_class=GPT2LMHeadModel,
|
||||||
double_head_model_class=GPT2DoubleHeadsModel)
|
double_head_model_class=GPT2DoubleHeadsModel)
|
||||||
model_tester.run_slow_tests()
|
model_tester.run_slow_tests()
|
||||||
|
|||||||
@@ -19,12 +19,11 @@ from __future__ import print_function
|
|||||||
import unittest
|
import unittest
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel,
|
from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel,
|
||||||
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
|
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
|
||||||
|
|
||||||
from .modeling_common_test import (create_and_check_commons, ConfigTester, GPTModelTester)
|
from .modeling_common_test import CommonTestCases, ConfigTester
|
||||||
|
|
||||||
class OpenAIModelTest(unittest.TestCase):
|
class OpenAIModelTest(unittest.TestCase):
|
||||||
|
|
||||||
@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
|
|||||||
config_tester.run_common_tests()
|
config_tester.run_common_tests()
|
||||||
|
|
||||||
def test_model(self):
|
def test_model(self):
|
||||||
model_tester = GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
|
model_tester = CommonTestCases.GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
|
||||||
lm_head_model_class=OpenAIGPTLMHeadModel,
|
lm_head_model_class=OpenAIGPTLMHeadModel,
|
||||||
double_head_model_class=OpenAIGPTDoubleHeadsModel)
|
double_head_model_class=OpenAIGPTDoubleHeadsModel)
|
||||||
model_tester.run_common_tests(test_presents=False)
|
model_tester.run_common_tests(test_presents=False)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_pretrained(self):
|
def test_pretrained(self):
|
||||||
model_tester = GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
|
model_tester = CommonTestCases.GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
|
||||||
lm_head_model_class=OpenAIGPTLMHeadModel,
|
lm_head_model_class=OpenAIGPTLMHeadModel,
|
||||||
double_head_model_class=OpenAIGPTDoubleHeadsModel)
|
double_head_model_class=OpenAIGPTDoubleHeadsModel)
|
||||||
model_tester.run_slow_tests()
|
model_tester.run_slow_tests()
|
||||||
|
|||||||
@@ -28,9 +28,15 @@ import torch
|
|||||||
from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
|
from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
|
||||||
from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|
||||||
from .modeling_common_test import ConfigTester, create_and_check_commons, ids_tensor
|
from .modeling_common_test import ConfigTester, CommonTestCases, ids_tensor
|
||||||
|
|
||||||
|
class TransfoXLModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
|
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel)
|
||||||
|
test_pruning = False
|
||||||
|
test_torchscript = False
|
||||||
|
test_resize_embeddings = False
|
||||||
|
|
||||||
class TransfoXLModelTest(unittest.TestCase):
|
|
||||||
class TransfoXLModelTester(object):
|
class TransfoXLModelTester(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
|
|||||||
num_hidden_layers=5,
|
num_hidden_layers=5,
|
||||||
scope=None,
|
scope=None,
|
||||||
seed=1,
|
seed=1,
|
||||||
all_model_classes=(TransfoXLModel, TransfoXLLMHeadModel),
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
|
|||||||
self.num_hidden_layers = num_hidden_layers
|
self.num_hidden_layers = num_hidden_layers
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
self.all_model_classes = all_model_classes
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
|
|||||||
list(list(mem.size()) for mem in result["mems_2"]),
|
list(list(mem.size()) for mem in result["mems_2"]),
|
||||||
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
|
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
|
||||||
|
|
||||||
def create_and_check_transfo_xl_commons(self, config, input_ids_1, input_ids_2, lm_labels):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
|
||||||
inputs_dict = {'input_ids': input_ids_1}
|
inputs_dict = {'input_ids': input_ids_1}
|
||||||
create_and_check_commons(self, config, inputs_dict, test_pruning=False, test_torchscript=False)
|
return config, inputs_dict
|
||||||
|
|
||||||
def test_default(self):
|
|
||||||
self.run_tester(TransfoXLModelTest.TransfoXLModelTester(self))
|
def setUp(self):
|
||||||
|
self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
|
self.config_tester.run_common_tests()
|
||||||
config_tester.run_common_tests()
|
|
||||||
|
def test_transfo_xl_model(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
output_result = self.model_tester.create_transfo_xl_model(*config_and_inputs)
|
||||||
|
self.model_tester.check_transfo_xl_model_output(output_result)
|
||||||
|
|
||||||
|
def test_transfo_xl_lm_head(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
|
||||||
|
self.model_tester.check_transfo_xl_lm_head_output(output_result)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
|
|||||||
shutil.rmtree(cache_dir)
|
shutil.rmtree(cache_dir)
|
||||||
self.assertIsNotNone(model)
|
self.assertIsNotNone(model)
|
||||||
|
|
||||||
def run_tester(self, tester):
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
output_result = tester.create_transfo_xl_model(*config_and_inputs)
|
|
||||||
tester.check_transfo_xl_model_output(output_result)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
output_result = tester.create_transfo_xl_lm_head(*config_and_inputs)
|
|
||||||
tester.check_transfo_xl_lm_head_output(output_result)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_transfo_xl_commons(*config_and_inputs)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -23,10 +23,15 @@ import pytest
|
|||||||
from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification)
|
from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification)
|
||||||
from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|
||||||
from .modeling_common_test import (create_and_check_commons, ConfigTester, ids_tensor)
|
from .modeling_common_test import (CommonTestCases, ConfigTester, ids_tensor)
|
||||||
|
|
||||||
|
|
||||||
class XLMModelTest(unittest.TestCase):
|
class XLMModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
|
all_model_classes = (XLMModel, XLMWithLMHeadModel,
|
||||||
|
XLMForQuestionAnswering, XLMForSequenceClassification)
|
||||||
|
# , XLMForSequenceClassification, XLMForTokenClassification),
|
||||||
|
|
||||||
class XLMModelTester(object):
|
class XLMModelTester(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
|
|||||||
summary_type="last",
|
summary_type="last",
|
||||||
use_proj=True,
|
use_proj=True,
|
||||||
scope=None,
|
scope=None,
|
||||||
all_model_classes = (XLMModel, XLMWithLMHeadModel,
|
|
||||||
XLMForQuestionAnswering, XLMForSequenceClassification), # , XLMForSequenceClassification, XLMForTokenClassification),
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
|
|||||||
self.num_labels = num_labels
|
self.num_labels = num_labels
|
||||||
self.num_choices = num_choices
|
self.num_choices = num_choices
|
||||||
self.scope = scope
|
self.scope = scope
|
||||||
self.all_model_classes = all_model_classes
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
|
|||||||
[self.batch_size, self.type_sequence_label_size])
|
[self.batch_size, self.type_sequence_label_size])
|
||||||
|
|
||||||
|
|
||||||
def create_and_check_xlm_commons(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
(config, input_ids, token_type_ids, input_lengths,
|
||||||
|
sequence_labels, token_labels, is_impossible_labels, input_mask) = config_and_inputs
|
||||||
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'lengths': input_lengths}
|
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'lengths': input_lengths}
|
||||||
create_and_check_commons(self, config, inputs_dict)
|
return config, inputs_dict
|
||||||
|
|
||||||
def test_default(self):
|
def setUp(self):
|
||||||
self.run_tester(XLMModelTest.XLMModelTester(self))
|
self.model_tester = XLMModelTest.XLMModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
|
self.config_tester.run_common_tests()
|
||||||
config_tester.run_common_tests()
|
|
||||||
|
|
||||||
@pytest.mark.slow
|
def test_xlm_model(self):
|
||||||
def test_model_from_pretrained(self):
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
cache_dir = "/tmp/pytorch_transformers_test/"
|
self.model_tester.create_and_check_xlm_model(*config_and_inputs)
|
||||||
for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
|
||||||
model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir)
|
|
||||||
shutil.rmtree(cache_dir)
|
|
||||||
self.assertIsNotNone(model)
|
|
||||||
|
|
||||||
def run_tester(self, tester):
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlm_model(*config_and_inputs)
|
|
||||||
|
|
||||||
# config_and_inputs = tester.prepare_config_and_inputs()
|
# config_and_inputs = tester.prepare_config_and_inputs()
|
||||||
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
|
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
|
||||||
@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
|
|||||||
# config_and_inputs = tester.prepare_config_and_inputs()
|
# config_and_inputs = tester.prepare_config_and_inputs()
|
||||||
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
|
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
|
||||||
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
@pytest.mark.slow
|
||||||
tester.create_and_check_xlm_commons(*config_and_inputs)
|
def test_model_from_pretrained(self):
|
||||||
|
cache_dir = "/tmp/pytorch_transformers_test/"
|
||||||
|
for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
||||||
|
model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir)
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
self.assertIsNotNone(model)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -28,9 +28,14 @@ import torch
|
|||||||
from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
||||||
from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
|
from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
|
||||||
|
|
||||||
from .modeling_common_test import ConfigTester, create_and_check_commons, ids_tensor
|
from .modeling_common_test import ConfigTester, CommonTestCases, ids_tensor
|
||||||
|
|
||||||
|
class XLNetModelTest(CommonTestCases.CommonModelTester):
|
||||||
|
|
||||||
|
all_model_classes=(XLNetModel, XLNetLMHeadModel,
|
||||||
|
XLNetForSequenceClassification, XLNetForQuestionAnswering)
|
||||||
|
test_pruning = False
|
||||||
|
|
||||||
class XLNetModelTest(unittest.TestCase):
|
|
||||||
class XLNetModelTester(object):
|
class XLNetModelTester(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
|
|||||||
initializer_range=0.05,
|
initializer_range=0.05,
|
||||||
seed=1,
|
seed=1,
|
||||||
type_vocab_size=2,
|
type_vocab_size=2,
|
||||||
all_model_classes=(XLNetModel, XLNetLMHeadModel,
|
|
||||||
XLNetForSequenceClassification, XLNetForQuestionAnswering),
|
|
||||||
):
|
):
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
|
|||||||
self.seed = seed
|
self.seed = seed
|
||||||
self.type_vocab_size = type_vocab_size
|
self.type_vocab_size = type_vocab_size
|
||||||
self.type_sequence_label_size = type_sequence_label_size
|
self.type_sequence_label_size = type_sequence_label_size
|
||||||
self.all_model_classes = all_model_classes
|
|
||||||
|
|
||||||
def prepare_config_and_inputs(self):
|
def prepare_config_and_inputs(self):
|
||||||
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||||
@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
|
|||||||
list(list(mem.size()) for mem in result["mems_1"]),
|
list(list(mem.size()) for mem in result["mems_1"]),
|
||||||
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
|
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
|
||||||
|
|
||||||
def create_and_check_xlnet_commons(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
|
def prepare_config_and_inputs_for_common(self):
|
||||||
target_mapping, inp_q, segment_ids, lm_labels, sequence_labels, is_impossible_labels):
|
config_and_inputs = self.prepare_config_and_inputs()
|
||||||
|
(config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
|
||||||
|
target_mapping, inp_q, segment_ids, lm_labels,
|
||||||
|
sequence_labels, is_impossible_labels) = config_and_inputs
|
||||||
inputs_dict = {'input_ids': input_ids_1}
|
inputs_dict = {'input_ids': input_ids_1}
|
||||||
create_and_check_commons(self, config, inputs_dict, test_pruning=False)
|
return config, inputs_dict
|
||||||
|
|
||||||
def test_default(self):
|
|
||||||
self.run_tester(XLNetModelTest.XLNetModelTester(self))
|
def setUp(self):
|
||||||
|
self.model_tester = XLNetModelTest.XLNetModelTester(self)
|
||||||
|
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
||||||
|
|
||||||
def test_config(self):
|
def test_config(self):
|
||||||
config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
|
self.config_tester.run_common_tests()
|
||||||
config_tester.run_common_tests()
|
|
||||||
|
def test_xlnet_base_model(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_xlnet_lm_head(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_xlnet_sequence_classif(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)
|
||||||
|
|
||||||
|
def test_xlnet_qa(self):
|
||||||
|
self.model_tester.set_seed()
|
||||||
|
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||||
|
self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)
|
||||||
|
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_model_from_pretrained(self):
|
def test_model_from_pretrained(self):
|
||||||
@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
|
|||||||
shutil.rmtree(cache_dir)
|
shutil.rmtree(cache_dir)
|
||||||
self.assertIsNotNone(model)
|
self.assertIsNotNone(model)
|
||||||
|
|
||||||
def run_tester(self, tester):
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlnet_base_model(*config_and_inputs)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlnet_lm_head(*config_and_inputs)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlnet_qa(*config_and_inputs)
|
|
||||||
|
|
||||||
tester.set_seed()
|
|
||||||
config_and_inputs = tester.prepare_config_and_inputs()
|
|
||||||
tester.create_and_check_xlnet_commons(*config_and_inputs)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user