From 4e10acb3e59f5ef52f383e5a82987e672f17b1fd Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 10 Jun 2020 13:19:53 -0400 Subject: [PATCH] Add more models to common tests (#4910) --- src/transformers/modeling_distilbert.py | 2 +- src/transformers/modeling_electra.py | 2 +- src/transformers/modeling_longformer.py | 7 ++++++- src/transformers/modeling_roberta.py | 4 +++- tests/test_modeling_distilbert.py | 8 +++++++- tests/test_modeling_electra.py | 10 +++++++++- tests/test_modeling_longformer.py | 14 +++++++++++++- tests/test_modeling_roberta.py | 17 +++++++++++++++-- tests/test_modeling_xlnet.py | 4 ++++ 9 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 1713164498..522b5bef45 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -848,7 +848,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) - outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here + outputs = (logits,) + outputs[1:] # add hidden states and attention if they are here if labels is not None: loss_fct = CrossEntropyLoss() # Only keep active parts of the loss diff --git a/src/transformers/modeling_electra.py b/src/transformers/modeling_electra.py index 48b78eb9c7..e85a57cddd 100644 --- a/src/transformers/modeling_electra.py +++ b/src/transformers/modeling_electra.py @@ -435,7 +435,7 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel): sequence_output = discriminator_hidden_states[0] logits = self.classifier(sequence_output) - outputs = (logits,) + discriminator_hidden_states[2:] # add hidden states and attention if they are here + outputs = (logits,) + discriminator_hidden_states[1:] # add hidden states and attention if they are here if labels is not None: if self.num_labels == 1: diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index 3b3456b676..aaf33b078d 100644 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -797,6 +797,8 @@ class LongformerForSequenceClassification(BertPreTrainedModel): self.longformer = LongformerModel(config) self.classifier = LongformerClassificationHead(config) + self.init_weights() + @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) def forward( self, @@ -861,6 +863,7 @@ class LongformerForSequenceClassification(BertPreTrainedModel): token_type_ids=token_type_ids, position_ids=position_ids, inputs_embeds=inputs_embeds, + output_attentions=output_attentions, ) sequence_output = outputs[0] logits = self.classifier(sequence_output) @@ -919,7 +922,7 @@ class LongformerForQuestionAnswering(BertPreTrainedModel): @add_start_docstrings_to_callable(LONGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) def forward( self, - input_ids, + input_ids=None, attention_mask=None, global_attention_mask=None, token_type_ids=None, @@ -1099,6 +1102,7 @@ class LongformerForTokenClassification(BertPreTrainedModel): token_type_ids=token_type_ids, position_ids=position_ids, inputs_embeds=inputs_embeds, + output_attentions=output_attentions, ) sequence_output = outputs[0] @@ -1228,6 +1232,7 @@ class LongformerForMultipleChoice(BertPreTrainedModel): token_type_ids=flat_token_type_ids, attention_mask=flat_attention_mask, global_attention_mask=flat_global_attention_mask, + output_attentions=output_attentions, ) pooled_output = outputs[1] diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index 8b4bb0ce31..91807c13ae 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -300,6 +300,8 @@ class RobertaForSequenceClassification(BertPreTrainedModel): self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) + self.init_weights() + @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) def forward( self, @@ -618,7 +620,7 @@ class RobertaForQuestionAnswering(BertPreTrainedModel): @add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) def forward( self, - input_ids, + input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index a90288495b..a5b9024ac6 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -38,7 +38,13 @@ if is_torch_available(): class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = ( - (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, DistilBertForSequenceClassification) + ( + DistilBertModel, + DistilBertForMaskedLM, + DistilBertForQuestionAnswering, + DistilBertForSequenceClassification, + DistilBertForTokenClassification, + ) if is_torch_available() else None ) diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 22254a81a1..9c0a676d26 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -39,7 +39,15 @@ if is_torch_available(): class ElectraModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = ( - (ElectraModel, ElectraForMaskedLM, ElectraForTokenClassification,) if is_torch_available() else () + ( + ElectraModel, + ElectraForPreTraining, + ElectraForMaskedLM, + ElectraForTokenClassification, + ElectraForSequenceClassification, + ) + if is_torch_available() + else () ) class ElectraModelTester(object): diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py index 7f4f993ea2..d1c7beca94 100644 --- a/tests/test_modeling_longformer.py +++ b/tests/test_modeling_longformer.py @@ -296,7 +296,19 @@ class LongformerModelTest(ModelTesterMixin, unittest.TestCase): test_headmasking = False # head masking is not supported test_torchscript = False - all_model_classes = (LongformerModel, LongformerForMaskedLM,) if is_torch_available() else () + all_model_classes = ( + ( + LongformerModel, + LongformerForMaskedLM, + # TODO: make tests pass for those models + # LongformerForSequenceClassification, + # LongformerForQuestionAnswering, + # LongformerForTokenClassification, + # LongformerForMultipleChoice, + ) + if is_torch_available() + else () + ) def setUp(self): self.model_tester = LongformerModelTester(self) diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index ac9e9396dd..dbfb6e6ba4 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -29,10 +29,12 @@ if is_torch_available(): RobertaConfig, RobertaModel, RobertaForMaskedLM, + RobertaForMultipleChoice, + RobertaForQuestionAnswering, RobertaForSequenceClassification, RobertaForTokenClassification, ) - from transformers.modeling_roberta import RobertaEmbeddings, RobertaForMultipleChoice, RobertaForQuestionAnswering + from transformers.modeling_roberta import RobertaEmbeddings from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_utils import create_position_ids_from_input_ids @@ -40,7 +42,18 @@ if is_torch_available(): @require_torch class RobertaModelTest(ModelTesterMixin, unittest.TestCase): - all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else () + all_model_classes = ( + ( + RobertaForMaskedLM, + RobertaModel, + RobertaForSequenceClassification, + RobertaForTokenClassification, + RobertaForMultipleChoice, + RobertaForQuestionAnswering, + ) + if is_torch_available() + else () + ) class RobertaModelTester(object): def __init__( diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index 1e53393916..fccee2d6a4 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -31,6 +31,7 @@ if is_torch_available(): XLNetConfig, XLNetModel, XLNetLMHeadModel, + XLNetForMultipleChoice, XLNetForSequenceClassification, XLNetForTokenClassification, XLNetForQuestionAnswering, @@ -48,6 +49,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): XLNetForTokenClassification, XLNetForSequenceClassification, XLNetForQuestionAnswering, + XLNetForMultipleChoice, ) if is_torch_available() else () @@ -84,6 +86,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): bos_token_id=1, eos_token_id=2, pad_token_id=5, + num_choices=4, ): self.parent = parent self.batch_size = batch_size @@ -110,6 +113,7 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): self.bos_token_id = bos_token_id self.pad_token_id = pad_token_id self.eos_token_id = eos_token_id + self.num_choices = num_choices def prepare_config_and_inputs(self): input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)