From c3d9ac7607ce68c0d20b744ded71e0be2435b732 Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Wed, 21 Jul 2021 10:13:11 +0200 Subject: [PATCH] Expose get_config() on ModelTesters (#12812) * Expose get_config() on ModelTesters * Typo --- ...ng_{{cookiecutter.lowercase_modelname}}.py | 11 +- tests/test_modeling_albert.py | 12 +- tests/test_modeling_bart.py | 13 +- tests/test_modeling_bert.py | 15 +- tests/test_modeling_bert_generation.py | 13 +- tests/test_modeling_big_bird.py | 12 +- tests/test_modeling_bigbird_pegasus.py | 13 +- tests/test_modeling_blenderbot.py | 15 +- tests/test_modeling_blenderbot_small.py | 20 +- tests/test_modeling_canine.py | 12 +- tests/test_modeling_clip.py | 28 +- tests/test_modeling_convbert.py | 12 +- tests/test_modeling_ctrl.py | 36 +- tests/test_modeling_deberta.py | 350 +++++++++--------- tests/test_modeling_deberta_v2.py | 350 +++++++++--------- tests/test_modeling_deit.py | 11 +- tests/test_modeling_detr.py | 12 +- tests/test_modeling_distilbert.py | 287 +++++++------- tests/test_modeling_dpr.py | 13 +- tests/test_modeling_electra.py | 30 +- tests/test_modeling_flaubert.py | 33 +- tests/test_modeling_fsmt.py | 18 +- tests/test_modeling_funnel.py | 30 +- tests/test_modeling_gpt2.py | 44 +-- tests/test_modeling_gpt_neo.py | 34 +- tests/test_modeling_hubert.py | 13 +- tests/test_modeling_ibert.py | 12 +- tests/test_modeling_layoutlm.py | 12 +- tests/test_modeling_led.py | 13 +- tests/test_modeling_longformer.py | 12 +- tests/test_modeling_luke.py | 39 +- tests/test_modeling_lxmert.py | 36 +- tests/test_modeling_m2m_100.py | 14 +- tests/test_modeling_marian.py | 14 +- tests/test_modeling_mbart.py | 13 +- tests/test_modeling_megatron_bert.py | 12 +- tests/test_modeling_mobilebert.py | 12 +- tests/test_modeling_mpnet.py | 10 +- tests/test_modeling_pegasus.py | 14 +- tests/test_modeling_prophetnet.py | 27 +- tests/test_modeling_reformer.py | 206 +++++------ tests/test_modeling_roberta.py | 12 +- tests/test_modeling_roformer.py | 12 +- tests/test_modeling_speech_to_text.py | 28 +- tests/test_modeling_squeezebert.py | 325 ++++++++-------- tests/test_modeling_t5.py | 27 +- tests/test_modeling_tapas.py | 36 +- tests/test_modeling_transfo_xl.py | 13 +- tests/test_modeling_visual_bert.py | 10 +- tests/test_modeling_vit.py | 12 +- tests/test_modeling_wav2vec2.py | 12 +- tests/test_modeling_xlm.py | 33 +- tests/test_modeling_xlnet.py | 39 +- 53 files changed, 1249 insertions(+), 1193 deletions(-) diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_{{cookiecutter.lowercase_modelname}}.py index c9d3738164..af52030f53 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_{{cookiecutter.lowercase_modelname}}.py @@ -22,6 +22,7 @@ from tests.test_modeling_common import floats_tensor from transformers import is_torch_available from transformers.testing_utils import require_torch, slow, torch_device +from transformers import {{cookiecutter.camelcase_modelname}}Config from .test_configuration_common import ConfigTester from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask @@ -30,7 +31,6 @@ if is_torch_available(): import torch from transformers import ( - {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM, {{cookiecutter.camelcase_modelname}}ForMaskedLM, {{cookiecutter.camelcase_modelname}}ForMultipleChoice, @@ -112,7 +112,12 @@ class {{cookiecutter.camelcase_modelname}}ModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = {{cookiecutter.camelcase_modelname}}Config( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return {{cookiecutter.camelcase_modelname}}Config( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -127,8 +132,6 @@ class {{cookiecutter.camelcase_modelname}}ModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_albert.py b/tests/test_modeling_albert.py index 06e60d6925..5be455b630 100644 --- a/tests/test_modeling_albert.py +++ b/tests/test_modeling_albert.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import AlbertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device @@ -29,7 +29,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - AlbertConfig, AlbertForMaskedLM, AlbertForMultipleChoice, AlbertForPreTraining, @@ -90,7 +89,12 @@ class AlbertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = AlbertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return AlbertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -105,8 +109,6 @@ class AlbertModelTester: num_hidden_groups=self.num_hidden_groups, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_bart.py b/tests/test_modeling_bart.py index 20f33f0dda..c80a39fc12 100644 --- a/tests/test_modeling_bart.py +++ b/tests/test_modeling_bart.py @@ -21,7 +21,7 @@ import unittest import timeout_decorator # noqa -from transformers import is_torch_available +from transformers import BartConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -35,7 +35,6 @@ if is_torch_available(): from transformers import ( AutoModelForSequenceClassification, - BartConfig, BartForCausalLM, BartForConditionalGeneration, BartForQuestionAnswering, @@ -78,7 +77,6 @@ def prepare_bart_inputs_dict( } -@require_torch class BartModelTester: def __init__( self, @@ -127,7 +125,12 @@ class BartModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = BartConfig( + config = self.get_config() + inputs_dict = prepare_bart_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return BartConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -143,8 +146,6 @@ class BartModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_bart_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_bert.py b/tests/test_modeling_bert.py index c87c97a543..c60d198978 100755 --- a/tests/test_modeling_bert.py +++ b/tests/test_modeling_bert.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import BertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device @@ -30,7 +30,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - BertConfig, BertForMaskedLM, BertForMultipleChoice, BertForNextSentencePrediction, @@ -112,7 +111,15 @@ class BertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = BertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + """ + Returns a tiny configuration by default. + """ + return BertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -127,8 +134,6 @@ class BertModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_bert_generation.py b/tests/test_modeling_bert_generation.py index 0ca0d81f40..c43e87d7fb 100755 --- a/tests/test_modeling_bert_generation.py +++ b/tests/test_modeling_bert_generation.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import BertGenerationConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, r if is_torch_available(): import torch - from transformers import BertGenerationConfig, BertGenerationDecoder, BertGenerationEncoder + from transformers import BertGenerationDecoder, BertGenerationEncoder class BertGenerationEncoderTester: @@ -79,7 +79,12 @@ class BertGenerationEncoderTester: if self.use_labels: token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = BertGenerationConfig( + config = self.get_config() + + return config, input_ids, input_mask, token_labels + + def get_config(self): + return BertGenerationConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -93,8 +98,6 @@ class BertGenerationEncoderTester: initializer_range=self.initializer_range, ) - return config, input_ids, input_mask, token_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_big_bird.py b/tests/test_modeling_big_bird.py index ba7d12fe2d..3a4a0b870c 100644 --- a/tests/test_modeling_big_bird.py +++ b/tests/test_modeling_big_bird.py @@ -18,7 +18,7 @@ import unittest from tests.test_modeling_common import floats_tensor -from transformers import is_torch_available +from transformers import BigBirdConfig, is_torch_available from transformers.models.auto import get_values from transformers.models.big_bird.tokenization_big_bird import BigBirdTokenizer from transformers.testing_utils import require_torch, slow, torch_device @@ -32,7 +32,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - BigBirdConfig, BigBirdForCausalLM, BigBirdForMaskedLM, BigBirdForMultipleChoice, @@ -126,7 +125,12 @@ class BigBirdModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = BigBirdConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return BigBirdConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -147,8 +151,6 @@ class BigBirdModelTester: position_embedding_type=self.position_embedding_type, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_bigbird_pegasus.py b/tests/test_modeling_bigbird_pegasus.py index 4965cbaa24..7f50ddd6f5 100644 --- a/tests/test_modeling_bigbird_pegasus.py +++ b/tests/test_modeling_bigbird_pegasus.py @@ -19,7 +19,7 @@ import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import BigBirdPegasusConfig, is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -31,7 +31,6 @@ if is_torch_available(): import torch from transformers import ( - BigBirdPegasusConfig, BigBirdPegasusForCausalLM, BigBirdPegasusForConditionalGeneration, BigBirdPegasusForQuestionAnswering, @@ -69,7 +68,6 @@ def prepare_bigbird_pegasus_inputs_dict( return input_dict -@require_torch class BigBirdPegasusModelTester: def __init__( self, @@ -129,7 +127,12 @@ class BigBirdPegasusModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = BigBirdPegasusConfig( + config = self.get_config() + inputs_dict = prepare_bigbird_pegasus_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return BigBirdPegasusConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -150,8 +153,6 @@ class BigBirdPegasusModelTester: num_random_blocks=self.num_random_blocks, scale_embedding=self.scale_embedding, ) - inputs_dict = prepare_bigbird_pegasus_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_blenderbot.py b/tests/test_modeling_blenderbot.py index dfaa3cdc0a..33d5064924 100644 --- a/tests/test_modeling_blenderbot.py +++ b/tests/test_modeling_blenderbot.py @@ -17,7 +17,7 @@ import tempfile import unittest -from transformers import is_torch_available +from transformers import BlenderbotConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -29,7 +29,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor if is_torch_available(): import torch - from transformers import BlenderbotConfig, BlenderbotForConditionalGeneration, BlenderbotModel, BlenderbotTokenizer + from transformers import BlenderbotForConditionalGeneration, BlenderbotModel, BlenderbotTokenizer from transformers.models.blenderbot.modeling_blenderbot import ( BlenderbotDecoder, BlenderbotEncoder, @@ -68,7 +68,6 @@ def prepare_blenderbot_inputs_dict( } -@require_torch class BlenderbotModelTester: def __init__( self, @@ -109,7 +108,6 @@ class BlenderbotModelTester: self.bos_token_id = bos_token_id def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, ) @@ -117,7 +115,12 @@ class BlenderbotModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = BlenderbotConfig( + config = self.get_config() + inputs_dict = prepare_blenderbot_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return BlenderbotConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -133,8 +136,6 @@ class BlenderbotModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_blenderbot_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_blenderbot_small.py b/tests/test_modeling_blenderbot_small.py index f5dc8c4207..a7e1818404 100644 --- a/tests/test_modeling_blenderbot_small.py +++ b/tests/test_modeling_blenderbot_small.py @@ -17,7 +17,7 @@ import tempfile import unittest -from transformers import is_torch_available +from transformers import BlenderbotSmallConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_torch, slow, torch_device @@ -29,12 +29,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor if is_torch_available(): import torch - from transformers import ( - BlenderbotSmallConfig, - BlenderbotSmallForConditionalGeneration, - BlenderbotSmallModel, - BlenderbotSmallTokenizer, - ) + from transformers import BlenderbotSmallForConditionalGeneration, BlenderbotSmallModel, BlenderbotSmallTokenizer from transformers.models.blenderbot_small.modeling_blenderbot_small import ( BlenderbotSmallDecoder, BlenderbotSmallEncoder, @@ -73,7 +68,6 @@ def prepare_blenderbot_small_inputs_dict( } -@require_torch class BlenderbotSmallModelTester: def __init__( self, @@ -114,7 +108,6 @@ class BlenderbotSmallModelTester: self.bos_token_id = bos_token_id def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, ) @@ -122,7 +115,12 @@ class BlenderbotSmallModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = BlenderbotSmallConfig( + config = self.get_config() + inputs_dict = prepare_blenderbot_small_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -138,8 +136,6 @@ class BlenderbotSmallModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_blenderbot_small_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_canine.py b/tests/test_modeling_canine.py index 9e92550069..adef5abae8 100644 --- a/tests/test_modeling_canine.py +++ b/tests/test_modeling_canine.py @@ -18,7 +18,7 @@ import unittest from typing import List, Tuple -from transformers import is_torch_available +from transformers import CanineConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -29,7 +29,6 @@ if is_torch_available(): import torch from transformers import ( - CanineConfig, CanineForMultipleChoice, CanineForQuestionAnswering, CanineForSequenceClassification, @@ -106,7 +105,12 @@ class CanineModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = CanineConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return CanineConfig( hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, @@ -120,8 +124,6 @@ class CanineModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_clip.py b/tests/test_modeling_clip.py index afcc5903c6..78f076bf39 100644 --- a/tests/test_modeling_clip.py +++ b/tests/test_modeling_clip.py @@ -21,6 +21,7 @@ import tempfile import unittest import requests +from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig from transformers.file_utils import is_torch_available, is_vision_available from transformers.testing_utils import require_torch, require_vision, slow, torch_device @@ -32,7 +33,7 @@ if is_torch_available(): import torch from torch import nn - from transformers import CLIPConfig, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPVisionConfig, CLIPVisionModel + from transformers import CLIPModel, CLIPTextModel, CLIPVisionModel from transformers.models.clip.modeling_clip import CLIP_PRETRAINED_MODEL_ARCHIVE_LIST @@ -77,7 +78,12 @@ class CLIPVisionModelTester: def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) - config = CLIPVisionConfig( + config = self.get_config() + + return config, pixel_values + + def get_config(self): + return CLIPVisionConfig( image_size=self.image_size, patch_size=self.patch_size, num_channels=self.num_channels, @@ -90,8 +96,6 @@ class CLIPVisionModelTester: initializer_range=self.initializer_range, ) - return config, pixel_values - def create_and_check_model(self, config, pixel_values): model = CLIPVisionModel(config=config) model.to(torch_device) @@ -323,7 +327,12 @@ class CLIPTextModelTester: if self.use_input_mask: input_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = CLIPTextConfig( + config = self.get_config() + + return config, input_ids, input_mask + + def get_config(self): + return CLIPTextConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -335,8 +344,6 @@ class CLIPTextModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, input_mask - def create_and_check_model(self, config, input_ids, input_mask): model = CLIPTextModel(config=config) model.to(torch_device) @@ -409,10 +416,15 @@ class CLIPModelTester: text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs() vision_config, pixel_values = self.vision_model_tester.prepare_config_and_inputs() - config = CLIPConfig.from_text_vision_configs(text_config, vision_config, projection_dim=64) + config = self.get_config() return config, input_ids, attention_mask, pixel_values + def get_config(self): + return CLIPConfig.from_text_vision_configs( + self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + ) + def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): model = CLIPModel(config).to(torch_device).eval() result = model(input_ids, pixel_values, attention_mask) diff --git a/tests/test_modeling_convbert.py b/tests/test_modeling_convbert.py index ebe7188755..21013f83b5 100644 --- a/tests/test_modeling_convbert.py +++ b/tests/test_modeling_convbert.py @@ -18,7 +18,7 @@ import unittest from tests.test_modeling_common import floats_tensor -from transformers import is_torch_available +from transformers import ConvBertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device @@ -31,7 +31,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_QUESTION_ANSWERING_MAPPING, - ConvBertConfig, ConvBertForMaskedLM, ConvBertForMultipleChoice, ConvBertForQuestionAnswering, @@ -110,7 +109,12 @@ class ConvBertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = ConvBertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return ConvBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -125,8 +129,6 @@ class ConvBertModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_ctrl.py b/tests/test_modeling_ctrl.py index d225462356..c35e3cc025 100644 --- a/tests/test_modeling_ctrl.py +++ b/tests/test_modeling_ctrl.py @@ -15,7 +15,7 @@ import unittest -from transformers import is_torch_available +from transformers import CTRLConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -28,7 +28,6 @@ if is_torch_available(): from transformers import ( CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, - CTRLConfig, CTRLForSequenceClassification, CTRLLMHeadModel, CTRLModel, @@ -88,21 +87,7 @@ class CTRLModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = CTRLConfig( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings, - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range, - pad_token_id=self.pad_token_id, - ) + config = self.get_config() head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) @@ -118,6 +103,23 @@ class CTRLModelTester: choice_labels, ) + def get_config(self): + return CTRLConfig( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + # intermediate_size=self.intermediate_size, + # hidden_act=self.hidden_act, + # hidden_dropout_prob=self.hidden_dropout_prob, + # attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings, + # type_vocab_size=self.type_vocab_size, + # initializer_range=self.initializer_range, + pad_token_id=self.pad_token_id, + ) + def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = CTRLModel(config=config) model.to(torch_device) diff --git a/tests/test_modeling_deberta.py b/tests/test_modeling_deberta.py index 1c66617b88..cb11b3a521 100644 --- a/tests/test_modeling_deberta.py +++ b/tests/test_modeling_deberta.py @@ -12,10 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import unittest -from transformers import is_torch_available +from transformers import DebertaConfig, is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -26,7 +25,6 @@ if is_torch_available(): import torch from transformers import ( - DebertaConfig, DebertaForMaskedLM, DebertaForQuestionAnswering, DebertaForSequenceClassification, @@ -36,6 +34,179 @@ if is_torch_available(): from transformers.models.deberta.modeling_deberta import DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST +class DebertaModelTester(object): + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + relative_attention=False, + position_biased_input=True, + pos_att_type="None", + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.relative_attention = relative_attention + self.position_biased_input = position_biased_input + self.pos_att_type = pos_att_type + self.scope = scope + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return DebertaConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + relative_attention=self.relative_attention, + position_biased_input=self.position_biased_input, + pos_att_type=self.pos_att_type, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result.loss.size()), []) + + def create_and_check_deberta_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaModel(config=config) + model.to(torch_device) + model.eval() + sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0] + sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] + sequence_output = model(input_ids)[0] + + self.parent.assertListEqual(list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]) + + def create_and_check_deberta_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaForMaskedLM(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) + + def create_and_check_deberta_for_sequence_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DebertaForSequenceClassification(config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) + self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_deberta_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DebertaForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + + def create_and_check_deberta_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + result = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + ) + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_torch class DebertaModelTest(ModelTesterMixin, unittest.TestCase): @@ -56,179 +227,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase): test_head_masking = False is_encoder_decoder = False - class DebertaModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - relative_attention=False, - position_biased_input=True, - pos_att_type="None", - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.relative_attention = relative_attention - self.position_biased_input = position_biased_input - self.pos_att_type = pos_att_type - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = DebertaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - relative_attention=self.relative_attention, - position_biased_input=self.position_biased_input, - pos_att_type=self.pos_att_type, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result.loss.size()), []) - - def create_and_check_deberta_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaModel(config=config) - model.to(torch_device) - model.eval() - sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0] - sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] - sequence_output = model(input_ids)[0] - - self.parent.assertListEqual( - list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_deberta_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_deberta_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DebertaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) - self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels]) - self.check_loss_output(result) - - def create_and_check_deberta_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DebertaForTokenClassification(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_deberta_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = DebertaModelTest.DebertaModelTester(self) + self.model_tester = DebertaModelTester(self) self.config_tester = ConfigTester(self, config_class=DebertaConfig, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_deberta_v2.py b/tests/test_modeling_deberta_v2.py index 718682edb3..5e022cc5d7 100644 --- a/tests/test_modeling_deberta_v2.py +++ b/tests/test_modeling_deberta_v2.py @@ -12,10 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import unittest -from transformers import is_torch_available +from transformers import DebertaV2Config, is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -26,7 +25,6 @@ if is_torch_available(): import torch from transformers import ( - DebertaV2Config, DebertaV2ForMaskedLM, DebertaV2ForQuestionAnswering, DebertaV2ForSequenceClassification, @@ -36,6 +34,179 @@ if is_torch_available(): from transformers.models.deberta_v2.modeling_deberta_v2 import DEBERTA_V2_PRETRAINED_MODEL_ARCHIVE_LIST +class DebertaV2ModelTester(object): + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=True, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + relative_attention=False, + position_biased_input=True, + pos_att_type="None", + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.relative_attention = relative_attention + self.position_biased_input = position_biased_input + self.pos_att_type = pos_att_type + self.scope = scope + + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) + + input_mask = None + if self.use_input_mask: + input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) + + token_type_ids = None + if self.use_token_type_ids: + token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) + + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) + + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return DebertaV2Config( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + relative_attention=self.relative_attention, + position_biased_input=self.position_biased_input, + pos_att_type=self.pos_att_type, + ) + + def check_loss_output(self, result): + self.parent.assertListEqual(list(result.loss.size()), []) + + def create_and_check_deberta_model( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaV2Model(config=config) + model.to(torch_device) + model.eval() + sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0] + sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] + sequence_output = model(input_ids)[0] + + self.parent.assertListEqual(list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]) + + def create_and_check_deberta_for_masked_lm( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaV2ForMaskedLM(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) + + def create_and_check_deberta_for_sequence_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DebertaV2ForSequenceClassification(config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) + self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels]) + self.check_loss_output(result) + + def create_and_check_deberta_for_token_classification( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DebertaV2ForTokenClassification(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + + def create_and_check_deberta_for_question_answering( + self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DebertaV2ForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + result = model( + input_ids, + attention_mask=input_mask, + token_type_ids=token_type_ids, + start_positions=sequence_labels, + end_positions=sequence_labels, + ) + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + ) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} + return config, inputs_dict + + @require_torch class DebertaV2ModelTest(ModelTesterMixin, unittest.TestCase): @@ -56,179 +227,8 @@ class DebertaV2ModelTest(ModelTesterMixin, unittest.TestCase): test_head_masking = False is_encoder_decoder = False - class DebertaV2ModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=True, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - relative_attention=False, - position_biased_input=True, - pos_att_type="None", - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.relative_attention = relative_attention - self.position_biased_input = position_biased_input - self.pos_att_type = pos_att_type - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = DebertaV2Config( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - initializer_range=self.initializer_range, - relative_attention=self.relative_attention, - position_biased_input=self.position_biased_input, - pos_att_type=self.pos_att_type, - ) - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def check_loss_output(self, result): - self.parent.assertListEqual(list(result.loss.size()), []) - - def create_and_check_deberta_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaV2Model(config=config) - model.to(torch_device) - model.eval() - sequence_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)[0] - sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] - sequence_output = model(input_ids)[0] - - self.parent.assertListEqual( - list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size] - ) - - def create_and_check_deberta_for_masked_lm( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaV2ForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_deberta_for_sequence_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DebertaV2ForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) - self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels]) - self.check_loss_output(result) - - def create_and_check_deberta_for_token_classification( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DebertaV2ForTokenClassification(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) - - def create_and_check_deberta_for_question_answering( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DebertaV2ForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - start_positions=sequence_labels, - end_positions=sequence_labels, - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} - return config, inputs_dict - def setUp(self): - self.model_tester = DebertaV2ModelTest.DebertaV2ModelTester(self) + self.model_tester = DebertaV2ModelTester(self) self.config_tester = ConfigTester(self, config_class=DebertaV2Config, hidden_size=37) def test_config(self): diff --git a/tests/test_modeling_deit.py b/tests/test_modeling_deit.py index 0eb24f84cf..c689a90af7 100644 --- a/tests/test_modeling_deit.py +++ b/tests/test_modeling_deit.py @@ -18,6 +18,7 @@ import inspect import unittest +from transformers import DeiTConfig from transformers.file_utils import cached_property, is_torch_available, is_vision_available from transformers.testing_utils import require_torch, require_vision, slow, torch_device @@ -31,7 +32,6 @@ if is_torch_available(): from transformers import ( MODEL_MAPPING, - DeiTConfig, DeiTForImageClassification, DeiTForImageClassificationWithTeacher, DeiTModel, @@ -92,7 +92,12 @@ class DeiTModelTester: if self.use_labels: labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - config = DeiTConfig( + config = self.get_config() + + return config, pixel_values, labels + + def get_config(self): + return DeiTConfig( image_size=self.image_size, patch_size=self.patch_size, num_channels=self.num_channels, @@ -107,8 +112,6 @@ class DeiTModelTester: initializer_range=self.initializer_range, ) - return config, pixel_values, labels - def create_and_check_model(self, config, pixel_values, labels): model = DeiTModel(config=config) model.to(torch_device) diff --git a/tests/test_modeling_detr.py b/tests/test_modeling_detr.py index bfa03f977f..5fba425f25 100644 --- a/tests/test_modeling_detr.py +++ b/tests/test_modeling_detr.py @@ -19,7 +19,7 @@ import inspect import math import unittest -from transformers import is_timm_available, is_vision_available +from transformers import DetrConfig, is_timm_available, is_vision_available from transformers.file_utils import cached_property from transformers.testing_utils import require_timm, require_vision, slow, torch_device @@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te if is_timm_available(): import torch - from transformers import DetrConfig, DetrForObjectDetection, DetrForSegmentation, DetrModel + from transformers import DetrForObjectDetection, DetrForSegmentation, DetrModel if is_vision_available(): @@ -40,7 +40,6 @@ if is_vision_available(): from transformers import DetrFeatureExtractor -@require_timm class DetrModelTester: def __init__( self, @@ -102,7 +101,11 @@ class DetrModelTester: target["masks"] = torch.rand(self.n_targets, self.min_size, self.max_size, device=torch_device) labels.append(target) - config = DetrConfig( + config = self.get_config() + return config, pixel_values, pixel_mask, labels + + def get_config(self): + return DetrConfig( d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, decoder_layers=self.num_hidden_layers, @@ -115,7 +118,6 @@ class DetrModelTester: num_queries=self.num_queries, num_labels=self.num_labels, ) - return config, pixel_values, pixel_mask, labels def prepare_config_and_inputs_for_common(self): config, pixel_values, pixel_mask, labels = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_distilbert.py b/tests/test_modeling_distilbert.py index 269cadf957..64a6a9cce9 100644 --- a/tests/test_modeling_distilbert.py +++ b/tests/test_modeling_distilbert.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import DistilBertConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -28,7 +28,6 @@ if is_torch_available(): from transformers import ( DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST, - DistilBertConfig, DistilBertForMaskedLM, DistilBertForMultipleChoice, DistilBertForQuestionAnswering, @@ -37,160 +36,162 @@ if is_torch_available(): DistilBertModel, ) - class DistilBertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=False, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) +class DistilBertModelTester(object): + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=False, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) + input_mask = None + if self.use_input_mask: + input_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = DistilBertConfig( - vocab_size=self.vocab_size, - dim=self.hidden_size, - n_layers=self.num_hidden_layers, - n_heads=self.num_attention_heads, - hidden_dim=self.intermediate_size, - hidden_act=self.hidden_act, - dropout=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - ) + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) - return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + config = self.get_config() - def create_and_check_distilbert_model( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DistilBertModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, input_mask) - result = model(input_ids) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size) - ) + return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_distilbert_for_masked_lm( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DistilBertForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) + def get_config(self): + return DistilBertConfig( + vocab_size=self.vocab_size, + dim=self.hidden_size, + n_layers=self.num_hidden_layers, + n_heads=self.num_attention_heads, + hidden_dim=self.intermediate_size, + hidden_act=self.hidden_act, + dropout=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + ) - def create_and_check_distilbert_for_question_answering( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = DistilBertForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + def create_and_check_distilbert_model( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DistilBertModel(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, input_mask) + result = model(input_ids) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - def create_and_check_distilbert_for_sequence_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DistilBertForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=sequence_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) + def create_and_check_distilbert_for_masked_lm( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DistilBertForMaskedLM(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - def create_and_check_distilbert_for_token_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = DistilBertForTokenClassification(config=config) - model.to(torch_device) - model.eval() + def create_and_check_distilbert_for_question_answering( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = DistilBertForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + result = model( + input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels + ) + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + def create_and_check_distilbert_for_sequence_classification( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DistilBertForSequenceClassification(config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, labels=sequence_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - def create_and_check_distilbert_for_multiple_choice( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = DistilBertForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) + def create_and_check_distilbert_for_token_classification( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = DistilBertForTokenClassification(config=config) + model.to(torch_device) + model.eval() - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} - return config, inputs_dict + result = model(input_ids, attention_mask=input_mask, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + + def create_and_check_distilbert_for_multiple_choice( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_choices = self.num_choices + model = DistilBertForMultipleChoice(config=config) + model.to(torch_device) + model.eval() + multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + result = model( + multiple_choice_inputs_ids, + attention_mask=multiple_choice_input_mask, + labels=choice_labels, + ) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} + return config, inputs_dict @require_torch diff --git a/tests/test_modeling_dpr.py b/tests/test_modeling_dpr.py index 05c9844b4b..8c7d17b542 100644 --- a/tests/test_modeling_dpr.py +++ b/tests/test_modeling_dpr.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import DPRConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -26,7 +26,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention if is_torch_available(): import torch - from transformers import DPRConfig, DPRContextEncoder, DPRQuestionEncoder, DPRReader, DPRReaderTokenizer + from transformers import DPRContextEncoder, DPRQuestionEncoder, DPRReader, DPRReaderTokenizer from transformers.models.dpr.modeling_dpr import ( DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST, @@ -104,7 +104,12 @@ class DPRModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = DPRConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return DPRConfig( projection_dim=self.projection_dim, vocab_size=self.vocab_size, hidden_size=self.hidden_size, @@ -119,8 +124,6 @@ class DPRModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_context_encoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_electra.py b/tests/test_modeling_electra.py index 8fcbb445a1..a19af17f52 100644 --- a/tests/test_modeling_electra.py +++ b/tests/test_modeling_electra.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import ElectraConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device @@ -29,7 +29,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - ElectraConfig, ElectraForMaskedLM, ElectraForMultipleChoice, ElectraForPreTraining, @@ -89,7 +88,21 @@ class ElectraModelTester: choice_labels = ids_tensor([self.batch_size], self.num_choices) fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1) - config = ElectraConfig( + config = self.get_config() + + return ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ) + + def get_config(self): + return ElectraConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -104,17 +117,6 @@ class ElectraModelTester: initializer_range=self.initializer_range, ) - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ) - def create_and_check_electra_model( self, config, diff --git a/tests/test_modeling_flaubert.py b/tests/test_modeling_flaubert.py index 5f5f2d6805..1d3daa2cab 100644 --- a/tests/test_modeling_flaubert.py +++ b/tests/test_modeling_flaubert.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. - import unittest -from transformers import is_torch_available +from transformers import FlaubertConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +26,6 @@ if is_torch_available(): import torch from transformers import ( - FlaubertConfig, FlaubertForMultipleChoice, FlaubertForQuestionAnswering, FlaubertForQuestionAnsweringSimple, @@ -96,7 +94,22 @@ class FlaubertModelTester(object): is_impossible_labels = ids_tensor([self.batch_size], 2).float() choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = FlaubertConfig( + config = self.get_config() + + return ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + choice_labels, + input_mask, + ) + + def get_config(self): + return FlaubertConfig( vocab_size=self.vocab_size, n_special=self.n_special, emb_dim=self.hidden_size, @@ -115,18 +128,6 @@ class FlaubertModelTester(object): use_proj=self.use_proj, ) - return ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - choice_labels, - input_mask, - ) - def create_and_check_flaubert_model( self, config, diff --git a/tests/test_modeling_fsmt.py b/tests/test_modeling_fsmt.py index 7c3ba4a1e8..8d8bb77142 100644 --- a/tests/test_modeling_fsmt.py +++ b/tests/test_modeling_fsmt.py @@ -19,7 +19,7 @@ import unittest import timeout_decorator # noqa from parameterized import parameterized -from transformers import is_torch_available +from transformers import FSMTConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -32,7 +32,7 @@ if is_torch_available(): import torch from torch import nn - from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer + from transformers import FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer from transformers.models.fsmt.modeling_fsmt import ( SinusoidalPositionalEmbedding, _prepare_fsmt_decoder_inputs, @@ -42,8 +42,7 @@ if is_torch_available(): from transformers.pipelines import TranslationPipeline -@require_torch -class ModelTester: +class FSMTModelTester: def __init__( self, parent, @@ -78,7 +77,12 @@ class ModelTester: ) input_ids[:, -1] = 2 # Eos Token - config = FSMTConfig( + config = self.get_config() + inputs_dict = prepare_fsmt_inputs_dict(config, input_ids) + return config, inputs_dict + + def get_config(self): + return FSMTConfig( vocab_size=self.src_vocab_size, # hack needed for common tests src_vocab_size=self.src_vocab_size, tgt_vocab_size=self.tgt_vocab_size, @@ -97,8 +101,6 @@ class ModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_fsmt_inputs_dict(config, input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() @@ -141,7 +143,7 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): test_missing_keys = False def setUp(self): - self.model_tester = ModelTester(self) + self.model_tester = FSMTModelTester(self) self.langs = ["en", "ru"] config = { "langs": self.langs, diff --git a/tests/test_modeling_funnel.py b/tests/test_modeling_funnel.py index c7f8f7bf0e..0f720d3b42 100644 --- a/tests/test_modeling_funnel.py +++ b/tests/test_modeling_funnel.py @@ -16,7 +16,7 @@ import unittest -from transformers import FunnelTokenizer, is_torch_available +from transformers import FunnelConfig, FunnelTokenizer, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -30,7 +30,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, FunnelBaseModel, - FunnelConfig, FunnelForMaskedLM, FunnelForMultipleChoice, FunnelForPreTraining, @@ -127,7 +126,21 @@ class FunnelModelTester: choice_labels = ids_tensor([self.batch_size], self.num_choices) fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1) - config = FunnelConfig( + config = self.get_config() + + return ( + config, + input_ids, + token_type_ids, + input_mask, + sequence_labels, + token_labels, + choice_labels, + fake_token_labels, + ) + + def get_config(self): + return FunnelConfig( vocab_size=self.vocab_size, block_sizes=self.block_sizes, num_decoder_layers=self.num_decoder_layers, @@ -143,17 +156,6 @@ class FunnelModelTester: type_vocab_size=self.type_vocab_size, ) - return ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - fake_token_labels, - ) - def create_and_check_model( self, config, diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index ff00231b4a..94abcbfecf 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -17,7 +17,7 @@ import datetime import unittest -from transformers import is_torch_available +from transformers import GPT2Config, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -30,7 +30,6 @@ if is_torch_available(): from transformers import ( GPT2_PRETRAINED_MODEL_ARCHIVE_LIST, - GPT2Config, GPT2DoubleHeadsModel, GPT2ForSequenceClassification, GPT2LMHeadModel, @@ -119,25 +118,7 @@ class GPT2ModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = GPT2Config( - vocab_size=self.vocab_size, - n_embd=self.hidden_size, - n_layer=self.num_hidden_layers, - n_head=self.num_attention_heads, - # intermediate_size=self.intermediate_size, - # hidden_act=self.hidden_act, - # hidden_dropout_prob=self.hidden_dropout_prob, - # attention_probs_dropout_prob=self.attention_probs_dropout_prob, - n_positions=self.max_position_embeddings, - n_ctx=self.max_position_embeddings, - # type_vocab_size=self.type_vocab_size, - # initializer_range=self.initializer_range, - use_cache=not gradient_checkpointing, - bos_token_id=self.bos_token_id, - eos_token_id=self.eos_token_id, - pad_token_id=self.pad_token_id, - gradient_checkpointing=gradient_checkpointing, - ) + config = self.get_config(gradient_checkpointing=gradient_checkpointing) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) @@ -153,6 +134,27 @@ class GPT2ModelTester: choice_labels, ) + def get_config(self, gradient_checkpointing=False): + return GPT2Config( + vocab_size=self.vocab_size, + n_embd=self.hidden_size, + n_layer=self.num_hidden_layers, + n_head=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + hidden_dropout_prob=self.hidden_dropout_prob, + attention_probs_dropout_prob=self.attention_probs_dropout_prob, + n_positions=self.max_position_embeddings, + n_ctx=self.max_position_embeddings, + type_vocab_size=self.type_vocab_size, + initializer_range=self.initializer_range, + use_cache=not gradient_checkpointing, + bos_token_id=self.bos_token_id, + eos_token_id=self.eos_token_id, + pad_token_id=self.pad_token_id, + gradient_checkpointing=gradient_checkpointing, + ) + def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_gpt_neo.py b/tests/test_modeling_gpt_neo.py index dab9f02c58..7a6b9e5514 100644 --- a/tests/test_modeling_gpt_neo.py +++ b/tests/test_modeling_gpt_neo.py @@ -17,7 +17,7 @@ import unittest -from transformers import is_torch_available +from transformers import GPTNeoConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_torch, slow, torch_device @@ -32,7 +32,6 @@ if is_torch_available(): from transformers import ( GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST, GPT2Tokenizer, - GPTNeoConfig, GPTNeoForCausalLM, GPTNeoForSequenceClassification, GPTNeoModel, @@ -123,20 +122,7 @@ class GPTNeoModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = GPTNeoConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_layers=self.num_hidden_layers, - num_heads=self.num_attention_heads, - max_position_embeddings=self.max_position_embeddings, - use_cache=not gradient_checkpointing, - bos_token_id=self.bos_token_id, - eos_token_id=self.eos_token_id, - pad_token_id=self.pad_token_id, - gradient_checkpointing=gradient_checkpointing, - window_size=self.window_size, - attention_types=self.attention_types, - ) + config = self.get_config(gradient_checkpointing=False) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) @@ -152,6 +138,22 @@ class GPTNeoModelTester: choice_labels, ) + def get_config(self, gradient_checkpointing=False): + return GPTNeoConfig( + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_layers=self.num_hidden_layers, + num_heads=self.num_attention_heads, + max_position_embeddings=self.max_position_embeddings, + use_cache=not gradient_checkpointing, + bos_token_id=self.bos_token_id, + eos_token_id=self.eos_token_id, + pad_token_id=self.pad_token_id, + gradient_checkpointing=gradient_checkpointing, + window_size=self.window_size, + attention_types=self.attention_types, + ) + def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_hubert.py b/tests/test_modeling_hubert.py index 31ac299646..206f5be5a7 100644 --- a/tests/test_modeling_hubert.py +++ b/tests/test_modeling_hubert.py @@ -21,7 +21,7 @@ import unittest import pytest from tests.test_modeling_common import floats_tensor, ids_tensor, random_attention_mask -from transformers import is_torch_available +from transformers import HubertConfig, is_torch_available from transformers.testing_utils import require_datasets, require_soundfile, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init if is_torch_available(): import torch - from transformers import HubertConfig, HubertForCTC, HubertModel, Wav2Vec2Processor + from transformers import HubertForCTC, HubertModel, Wav2Vec2Processor from transformers.models.hubert.modeling_hubert import _compute_mask_indices @@ -98,7 +98,12 @@ class HubertModelTester: input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size) attention_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = HubertConfig( + config = self.get_config() + + return config, input_values, attention_mask + + def get_config(self): + return HubertConfig( hidden_size=self.hidden_size, feat_extract_norm=self.feat_extract_norm, feat_extract_dropout=self.feat_extract_dropout, @@ -119,8 +124,6 @@ class HubertModelTester: vocab_size=self.vocab_size, ) - return config, input_values, attention_mask - def create_and_check_model(self, config, input_values, attention_mask): model = HubertModel(config=config) model.to(torch_device) diff --git a/tests/test_modeling_ibert.py b/tests/test_modeling_ibert.py index d0b672193c..5143090069 100755 --- a/tests/test_modeling_ibert.py +++ b/tests/test_modeling_ibert.py @@ -17,7 +17,7 @@ import copy import unittest -from transformers import is_torch_available +from transformers import IBertConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -30,7 +30,6 @@ if is_torch_available(): from transformers import ( IBERT_PRETRAINED_MODEL_ARCHIVE_LIST, - IBertConfig, IBertForMaskedLM, IBertForMultipleChoice, IBertForQuestionAnswering, @@ -97,7 +96,12 @@ class IBertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = IBertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return IBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -112,8 +116,6 @@ class IBertModelTester: quant_mode=True, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_layoutlm.py b/tests/test_modeling_layoutlm.py index a62d13e8fc..67423fe21f 100644 --- a/tests/test_modeling_layoutlm.py +++ b/tests/test_modeling_layoutlm.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import LayoutLMConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +27,6 @@ if is_torch_available(): import torch from transformers import ( - LayoutLMConfig, LayoutLMForMaskedLM, LayoutLMForSequenceClassification, LayoutLMForTokenClassification, @@ -120,7 +119,12 @@ class LayoutLMModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = LayoutLMConfig( + config = self.get_config() + + return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return LayoutLMConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -134,8 +138,6 @@ class LayoutLMModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_model( self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_led.py b/tests/test_modeling_led.py index 58323886c7..bfad0388b1 100644 --- a/tests/test_modeling_led.py +++ b/tests/test_modeling_led.py @@ -19,7 +19,7 @@ import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import LEDConfig, is_torch_available from transformers.file_utils import cached_property from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -34,7 +34,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_QUESTION_ANSWERING_MAPPING, - LEDConfig, LEDForConditionalGeneration, LEDForQuestionAnswering, LEDForSequenceClassification, @@ -75,7 +74,6 @@ def prepare_led_inputs_dict( } -@require_torch class LEDModelTester: def __init__( self, @@ -141,7 +139,12 @@ class LEDModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = LEDConfig( + config = self.get_config() + inputs_dict = prepare_led_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return LEDConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -158,8 +161,6 @@ class LEDModelTester: pad_token_id=self.pad_token_id, attention_window=self.attention_window, ) - inputs_dict = prepare_led_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_longformer.py b/tests/test_modeling_longformer.py index c5d5eee162..7bff80ed13 100644 --- a/tests/test_modeling_longformer.py +++ b/tests/test_modeling_longformer.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import LongformerConfig, is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +27,6 @@ if is_torch_available(): import torch from transformers import ( - LongformerConfig, LongformerForMaskedLM, LongformerForMultipleChoice, LongformerForQuestionAnswering, @@ -100,7 +99,12 @@ class LongformerModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = LongformerConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return LongformerConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -115,8 +119,6 @@ class LongformerModelTester: attention_window=self.attention_window, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_attention_mask_determinism( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_luke.py b/tests/test_modeling_luke.py index 1343da5ce2..99ef4686ad 100644 --- a/tests/test_modeling_luke.py +++ b/tests/test_modeling_luke.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Testing suite for the PyTorch LUKE model. """ - import unittest -from transformers import is_torch_available +from transformers import LukeConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +26,6 @@ if is_torch_available(): import torch from transformers import ( - LukeConfig, LukeForEntityClassification, LukeForEntityPairClassification, LukeForEntitySpanClassification, @@ -154,7 +152,25 @@ class LukeModelTester: [self.batch_size, self.entity_length], self.num_entity_span_classification_labels ) - config = LukeConfig( + config = self.get_config() + + return ( + config, + input_ids, + attention_mask, + token_type_ids, + entity_ids, + entity_attention_mask, + entity_token_type_ids, + entity_position_ids, + sequence_labels, + entity_classification_labels, + entity_pair_classification_labels, + entity_span_classification_labels, + ) + + def get_config(self): + return LukeConfig( vocab_size=self.vocab_size, entity_vocab_size=self.entity_vocab_size, entity_emb_size=self.entity_emb_size, @@ -172,21 +188,6 @@ class LukeModelTester: use_entity_aware_attention=self.use_entity_aware_attention, ) - return ( - config, - input_ids, - attention_mask, - token_type_ids, - entity_ids, - entity_attention_mask, - entity_token_type_ids, - entity_position_ids, - sequence_labels, - entity_classification_labels, - entity_pair_classification_labels, - entity_span_classification_labels, - ) - def create_and_check_model( self, config, diff --git a/tests/test_modeling_lxmert.py b/tests/test_modeling_lxmert.py index 624d3e5e5c..e881f8ccfd 100644 --- a/tests/test_modeling_lxmert.py +++ b/tests/test_modeling_lxmert.py @@ -19,7 +19,7 @@ import unittest import numpy as np -from transformers import is_torch_available +from transformers import LxmertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_torch, slow, torch_device @@ -33,7 +33,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, MODEL_FOR_QUESTION_ANSWERING_MAPPING, - LxmertConfig, LxmertForPreTraining, LxmertForQuestionAnswering, LxmertModel, @@ -170,7 +169,24 @@ class LxmertModelTester: if self.task_matched: matched_label = ids_tensor([self.batch_size], self.num_labels) - config = LxmertConfig( + config = self.get_config() + + return ( + config, + input_ids, + visual_feats, + bounding_boxes, + token_type_ids, + input_mask, + obj_labels, + masked_lm_labels, + matched_label, + ans, + output_attentions, + ) + + def get_config(self): + return LxmertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_attention_heads=self.num_attention_heads, @@ -204,20 +220,6 @@ class LxmertModelTester: output_hidden_states=self.output_hidden_states, ) - return ( - config, - input_ids, - visual_feats, - bounding_boxes, - token_type_ids, - input_mask, - obj_labels, - masked_lm_labels, - matched_label, - ans, - output_attentions, - ) - def create_and_check_lxmert_model( self, config, diff --git a/tests/test_modeling_m2m_100.py b/tests/test_modeling_m2m_100.py index e39876e4ee..4625a25a74 100644 --- a/tests/test_modeling_m2m_100.py +++ b/tests/test_modeling_m2m_100.py @@ -19,7 +19,7 @@ import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import M2M100Config, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -31,7 +31,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor if is_torch_available(): import torch - from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Model, M2M100Tokenizer + from transformers import M2M100ForConditionalGeneration, M2M100Model, M2M100Tokenizer from transformers.models.m2m_100.modeling_m2m_100 import M2M100Decoder, M2M100Encoder @@ -66,7 +66,6 @@ def prepare_m2m_100_inputs_dict( } -@require_torch class M2M100ModelTester: def __init__( self, @@ -125,7 +124,12 @@ class M2M100ModelTester: input_ids = input_ids.clamp(self.pad_token_id + 1) decoder_input_ids = decoder_input_ids.clamp(self.pad_token_id + 1) - config = M2M100Config( + config = self.get_config() + inputs_dict = prepare_m2m_100_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return M2M100Config( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -143,8 +147,6 @@ class M2M100ModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_m2m_100_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_marian.py b/tests/test_modeling_marian.py index 7b6cb15306..839a1d32e3 100644 --- a/tests/test_modeling_marian.py +++ b/tests/test_modeling_marian.py @@ -17,7 +17,7 @@ import tempfile import unittest -from transformers import is_torch_available +from transformers import MarianConfig, is_torch_available from transformers.file_utils import cached_property from transformers.hf_api import HfApi from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -34,7 +34,6 @@ if is_torch_available(): AutoConfig, AutoModelWithLMHead, AutoTokenizer, - MarianConfig, MarianModel, MarianMTModel, TranslationPipeline, @@ -83,7 +82,6 @@ def prepare_marian_inputs_dict( } -@require_torch class MarianModelTester: def __init__( self, @@ -126,7 +124,6 @@ class MarianModelTester: self.decoder_start_token_id = decoder_start_token_id def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp( 3, ) @@ -134,7 +131,12 @@ class MarianModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = MarianConfig( + config = self.get_config() + inputs_dict = prepare_marian_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return MarianConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -151,8 +153,6 @@ class MarianModelTester: pad_token_id=self.pad_token_id, decoder_start_token_id=self.decoder_start_token_id, ) - inputs_dict = prepare_marian_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_mbart.py b/tests/test_modeling_mbart.py index 6e3a579418..5d556bf82e 100644 --- a/tests/test_modeling_mbart.py +++ b/tests/test_modeling_mbart.py @@ -19,7 +19,7 @@ import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import MBartConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -34,7 +34,6 @@ if is_torch_available(): from transformers import ( AutoTokenizer, BatchEncoding, - MBartConfig, MBartForCausalLM, MBartForConditionalGeneration, MBartForQuestionAnswering, @@ -75,7 +74,6 @@ def prepare_mbart_inputs_dict( } -@require_torch class MBartModelTester: def __init__( self, @@ -124,7 +122,12 @@ class MBartModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = MBartConfig( + config = self.get_config() + inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return MBartConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -140,8 +143,6 @@ class MBartModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_megatron_bert.py b/tests/test_modeling_megatron_bert.py index 0cfc444e04..5a06d57a9e 100644 --- a/tests/test_modeling_megatron_bert.py +++ b/tests/test_modeling_megatron_bert.py @@ -19,7 +19,7 @@ import math import os import unittest -from transformers import is_torch_available +from transformers import MegatronBertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -32,7 +32,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - MegatronBertConfig, MegatronBertForCausalLM, MegatronBertForMaskedLM, MegatronBertForMultipleChoice, @@ -115,7 +114,12 @@ class MegatronBertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = MegatronBertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return MegatronBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -131,8 +135,6 @@ class MegatronBertModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_megatron_bert_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_mobilebert.py b/tests/test_modeling_mobilebert.py index 3ebc770252..ec90b9b1b7 100644 --- a/tests/test_modeling_mobilebert.py +++ b/tests/test_modeling_mobilebert.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import MobileBertConfig, is_torch_available from transformers.models.auto import get_values from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -29,7 +29,6 @@ if is_torch_available(): from transformers import ( MODEL_FOR_PRETRAINING_MAPPING, - MobileBertConfig, MobileBertForMaskedLM, MobileBertForMultipleChoice, MobileBertForNextSentencePrediction, @@ -111,7 +110,12 @@ class MobileBertModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = MobileBertConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return MobileBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -127,8 +131,6 @@ class MobileBertModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_mobilebert_model( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): diff --git a/tests/test_modeling_mpnet.py b/tests/test_modeling_mpnet.py index 1d63824c45..7ce155226e 100644 --- a/tests/test_modeling_mpnet.py +++ b/tests/test_modeling_mpnet.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import MPNetConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -27,7 +27,6 @@ if is_torch_available(): import torch from transformers import ( - MPNetConfig, MPNetForMaskedLM, MPNetForMultipleChoice, MPNetForQuestionAnswering, @@ -104,7 +103,11 @@ class MPNetModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = MPNetConfig( + config = self.get_config() + return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return MPNetConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -116,7 +119,6 @@ class MPNetModelTester: max_position_embeddings=self.max_position_embeddings, initializer_range=self.initializer_range, ) - return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels def create_and_check_mpnet_model( self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels diff --git a/tests/test_modeling_pegasus.py b/tests/test_modeling_pegasus.py index 4106793332..0515ee9577 100644 --- a/tests/test_modeling_pegasus.py +++ b/tests/test_modeling_pegasus.py @@ -17,7 +17,7 @@ import tempfile import unittest -from transformers import is_torch_available +from transformers import PegasusConfig, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -30,7 +30,7 @@ from .test_modeling_mbart import AbstractSeq2SeqIntegrationTest if is_torch_available(): import torch - from transformers import AutoModelForSeq2SeqLM, PegasusConfig, PegasusForConditionalGeneration, PegasusModel + from transformers import AutoModelForSeq2SeqLM, PegasusForConditionalGeneration, PegasusModel from transformers.models.pegasus.modeling_pegasus import PegasusDecoder, PegasusEncoder, PegasusForCausalLM @@ -65,7 +65,6 @@ def prepare_pegasus_inputs_dict( } -@require_torch class PegasusModelTester: def __init__( self, @@ -114,7 +113,12 @@ class PegasusModelTester: decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = PegasusConfig( + config = self.get_config() + inputs_dict = prepare_pegasus_inputs_dict(config, input_ids, decoder_input_ids) + return config, inputs_dict + + def get_config(self): + return PegasusConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -130,8 +134,6 @@ class PegasusModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_pegasus_inputs_dict(config, input_ids, decoder_input_ids) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_prophetnet.py b/tests/test_modeling_prophetnet.py index 32f1000444..3f6f942cd5 100644 --- a/tests/test_modeling_prophetnet.py +++ b/tests/test_modeling_prophetnet.py @@ -13,12 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import ProphetNetConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -30,7 +29,6 @@ if is_torch_available(): import torch from transformers import ( - ProphetNetConfig, ProphetNetDecoder, ProphetNetEncoder, ProphetNetForCausalLM, @@ -124,7 +122,19 @@ class ProphetNetModelTester: if self.use_labels: lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - config = ProphetNetConfig( + config = self.get_config() + + return ( + config, + input_ids, + decoder_input_ids, + attention_mask, + decoder_attention_mask, + lm_labels, + ) + + def get_config(self): + return ProphetNetConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_encoder_layers=self.num_encoder_layers, @@ -145,15 +155,6 @@ class ProphetNetModelTester: is_encoder_decoder=self.is_encoder_decoder, ) - return ( - config, - input_ids, - decoder_input_ids, - attention_mask, - decoder_attention_mask, - lm_labels, - ) - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index e8e5129a10..ad04643b76 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -15,7 +15,7 @@ import unittest -from transformers import is_torch_available +from transformers import ReformerConfig, is_torch_available from transformers.testing_utils import ( require_sentencepiece, require_tokenizers, @@ -36,7 +36,6 @@ if is_torch_available(): from transformers import ( REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, - ReformerConfig, ReformerForMaskedLM, ReformerForQuestionAnswering, ReformerForSequenceClassification, @@ -51,44 +50,44 @@ class ReformerModelTester: def __init__( self, parent, - batch_size=None, - seq_length=None, - is_training=None, - is_decoder=None, - use_input_mask=None, - use_labels=None, - vocab_size=None, - attention_head_size=None, - hidden_size=None, - num_attention_heads=None, - local_attn_chunk_length=None, - local_num_chunks_before=None, - local_num_chunks_after=None, + batch_size=13, + seq_length=32, + is_training=True, + is_decoder=True, + use_input_mask=True, + use_labels=True, + vocab_size=32, + attention_head_size=16, + hidden_size=32, + num_attention_heads=2, + local_attn_chunk_length=4, + local_num_chunks_before=1, + local_num_chunks_after=0, num_buckets=None, num_hashes=1, lsh_attn_chunk_length=None, lsh_num_chunks_before=None, lsh_num_chunks_after=None, - chunk_size_lm_head=None, - chunk_size_feed_forward=None, - feed_forward_size=None, - hidden_act=None, - hidden_dropout_prob=None, - local_attention_probs_dropout_prob=None, + chunk_size_lm_head=0, + chunk_size_feed_forward=0, + feed_forward_size=32, + hidden_act="gelu", + hidden_dropout_prob=0.1, + local_attention_probs_dropout_prob=0.1, lsh_attention_probs_dropout_prob=None, - max_position_embeddings=None, - initializer_range=None, - axial_norm_std=None, - layer_norm_eps=None, - axial_pos_embds=None, - axial_pos_shape=None, - axial_pos_embds_dim=None, - attn_layers=None, - pad_token_id=None, - eos_token_id=None, + max_position_embeddings=512, + initializer_range=0.02, + axial_norm_std=1.0, + layer_norm_eps=1e-12, + axial_pos_embds=True, + axial_pos_shape=[4, 8], + axial_pos_embds_dim=[16, 16], + attn_layers=["local", "local", "local", "local"], + pad_token_id=0, + eos_token_id=2, scope=None, - hash_seed=None, - num_labels=None, + hash_seed=0, + num_labels=2, ): self.parent = parent self.batch_size = batch_size @@ -101,7 +100,7 @@ class ReformerModelTester: self.attention_head_size = attention_head_size self.hidden_size = hidden_size self.num_attention_heads = num_attention_heads - self.num_hidden_layers = len(attn_layers) + self.num_hidden_layers = len(attn_layers) if attn_layers is not None else 0 self.local_attn_chunk_length = local_attn_chunk_length self.local_num_chunks_after = local_num_chunks_after self.local_num_chunks_before = local_num_chunks_before @@ -149,7 +148,17 @@ class ReformerModelTester: if self.use_labels: choice_labels = ids_tensor([self.batch_size], 2) - config = ReformerConfig( + config = self.get_config() + + return ( + config, + input_ids, + input_mask, + choice_labels, + ) + + def get_config(self): + return ReformerConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -177,13 +186,6 @@ class ReformerModelTester: hash_seed=self.hash_seed, ) - return ( - config, - input_ids, - input_mask, - choice_labels, - ) - def create_and_check_reformer_model(self, config, input_ids, input_mask, choice_labels): model = ReformerModel(config=config) model.to(torch_device) @@ -593,45 +595,8 @@ class ReformerLocalAttnModelTest(ReformerTesterMixin, GenerationTesterMixin, Mod test_torchscript = False test_sequence_classification_problem_types = True - def prepare_kwargs(self): - return { - "batch_size": 13, - "seq_length": 32, - "is_training": True, - "is_decoder": True, - "use_input_mask": True, - "use_labels": True, - "vocab_size": 32, - "attention_head_size": 16, - "hidden_size": 32, - "num_attention_heads": 2, - "local_attn_chunk_length": 4, - "local_num_chunks_before": 1, - "local_num_chunks_after": 0, - "chunk_size_lm_head": 0, - "chunk_size_feed_forward": 0, - "feed_forward_size": 32, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "local_attention_probs_dropout_prob": 0.1, - "max_position_embeddings": 512, - "initializer_range": 0.02, - "axial_norm_std": 1.0, - "layer_norm_eps": 1e-12, - "axial_pos_embds": True, - "axial_pos_shape": [4, 8], - "axial_pos_embds_dim": [16, 16], - "attn_layers": ["local", "local", "local", "local"], - "pad_token_id": 0, - "eos_token_id": 2, - "scope": None, - "hash_seed": 0, - "num_labels": 2, - } - def setUp(self): - tester_kwargs = self.prepare_kwargs() - self.model_tester = ReformerModelTester(self, **tester_kwargs) + self.model_tester = ReformerModelTester(self) self.config_tester = ConfigTester(self, config_class=ReformerConfig, hidden_size=37) @slow @@ -716,49 +681,46 @@ class ReformerLSHAttnModelTest(ReformerTesterMixin, ModelTesterMixin, Generation test_headmasking = False test_torchscript = False - def prepare_kwargs(self): - return { - "batch_size": 13, - "seq_length": 13, - "use_input_mask": True, - "use_labels": True, - "is_training": False, - "is_decoder": True, - "vocab_size": 32, - "attention_head_size": 16, - "hidden_size": 64, - "num_attention_heads": 2, - "num_buckets": 2, - "num_hashes": 4, - "lsh_attn_chunk_length": 4, - "lsh_num_chunks_before": 1, - "lsh_num_chunks_after": 0, - "chunk_size_lm_head": 5, - "chunk_size_feed_forward": 6, - "feed_forward_size": 32, - "hidden_act": "relu", - "hidden_dropout_prob": 0.1, - "lsh_attention_probs_dropout_prob": 0.1, - "max_position_embeddings": 512, - "initializer_range": 0.02, - "axial_norm_std": 1.0, - "layer_norm_eps": 1e-12, - "axial_pos_embds": True, - "axial_pos_shape": [4, 8], - "axial_pos_embds_dim": [16, 48], - # sanotheu - # "attn_layers": ["lsh", "lsh", "lsh", "lsh"], - "attn_layers": ["lsh"], - "pad_token_id": 0, - "eos_token_id": 2, - "scope": None, - "hash_seed": 0, - "num_labels": 2, - } - def setUp(self): - tester_kwargs = self.prepare_kwargs() - self.model_tester = ReformerModelTester(self, **tester_kwargs) + self.model_tester = ReformerModelTester( + self, + batch_size=13, + seq_length=13, + use_input_mask=True, + use_labels=True, + is_training=False, + is_decoder=True, + vocab_size=32, + attention_head_size=16, + hidden_size=64, + num_attention_heads=2, + num_buckets=2, + num_hashes=4, + lsh_attn_chunk_length=4, + lsh_num_chunks_before=1, + lsh_num_chunks_after=0, + chunk_size_lm_head=5, + chunk_size_feed_forward=6, + feed_forward_size=32, + hidden_act="relu", + hidden_dropout_prob=0.1, + lsh_attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + initializer_range=0.02, + axial_norm_std=1.0, + layer_norm_eps=1e-12, + axial_pos_embds=True, + axial_pos_shape=[4, 8], + axial_pos_embds_dim=[16, 48], + # sanotheu + # attn_layers=[lsh,lsh,lsh,lsh], + attn_layers=["lsh"], + pad_token_id=0, + eos_token_id=2, + scope=None, + hash_seed=0, + num_labels=2, + ) self.config_tester = ConfigTester(self, config_class=ReformerConfig, hidden_size=37) def _check_attentions_for_generate( diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py index bed69c3469..0f700009e0 100644 --- a/tests/test_modeling_roberta.py +++ b/tests/test_modeling_roberta.py @@ -17,7 +17,7 @@ import unittest from copy import deepcopy -from transformers import is_torch_available +from transformers import RobertaConfig, is_torch_available from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -29,7 +29,6 @@ if is_torch_available(): import torch from transformers import ( - RobertaConfig, RobertaForCausalLM, RobertaForMaskedLM, RobertaForMultipleChoice, @@ -94,7 +93,12 @@ class RobertaModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = RobertaConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return RobertaConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -108,8 +112,6 @@ class RobertaModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_roformer.py b/tests/test_modeling_roformer.py index fdb39abbf9..9a32c63079 100644 --- a/tests/test_modeling_roformer.py +++ b/tests/test_modeling_roformer.py @@ -18,7 +18,7 @@ import unittest from tests.test_modeling_common import floats_tensor -from transformers import is_torch_available +from transformers import RoFormerConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -29,7 +29,6 @@ if is_torch_available(): import torch from transformers import ( - RoFormerConfig, RoFormerForCausalLM, RoFormerForMaskedLM, RoFormerForMultipleChoice, @@ -113,7 +112,12 @@ class RoFormerModelTester: token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = RoFormerConfig( + config = self.get_config() + + return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels + + def get_config(self): + return RoFormerConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -128,8 +132,6 @@ class RoFormerModelTester: initializer_range=self.initializer_range, ) - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - def prepare_config_and_inputs_for_decoder(self): ( config, diff --git a/tests/test_modeling_speech_to_text.py b/tests/test_modeling_speech_to_text.py index 102a33f4a3..2a56a583fa 100644 --- a/tests/test_modeling_speech_to_text.py +++ b/tests/test_modeling_speech_to_text.py @@ -14,13 +14,13 @@ # limitations under the License. """ Testing suite for the PyTorch Speech2Text model. """ - import copy import inspect import os import tempfile import unittest +from transformers import Speech2TextConfig from transformers.file_utils import cached_property from transformers.testing_utils import ( is_torch_available, @@ -40,12 +40,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te if is_torch_available(): import torch - from transformers import ( - Speech2TextConfig, - Speech2TextForConditionalGeneration, - Speech2TextModel, - Speech2TextProcessor, - ) + from transformers import Speech2TextForConditionalGeneration, Speech2TextModel, Speech2TextProcessor from transformers.models.speech_to_text.modeling_speech_to_text import Speech2TextDecoder, Speech2TextEncoder @@ -142,7 +137,17 @@ class Speech2TextModelTester: attention_mask = torch.ones([self.batch_size, self.seq_length], dtype=torch.long, device=torch_device) decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(2) - config = Speech2TextConfig( + config = self.get_config() + inputs_dict = prepare_speech_to_text_inputs_dict( + config, + input_features=input_features, + decoder_input_ids=decoder_input_ids, + attention_mask=attention_mask, + ) + return config, inputs_dict + + def get_config(self): + return Speech2TextConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -165,13 +170,6 @@ class Speech2TextModelTester: bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) - inputs_dict = prepare_speech_to_text_inputs_dict( - config, - input_features=input_features, - decoder_input_ids=decoder_input_ids, - attention_mask=attention_mask, - ) - return config, inputs_dict def prepare_config_and_inputs_for_common(self): config, inputs_dict = self.prepare_config_and_inputs() diff --git a/tests/test_modeling_squeezebert.py b/tests/test_modeling_squeezebert.py index 8f9d65fa9a..546b6f9e83 100644 --- a/tests/test_modeling_squeezebert.py +++ b/tests/test_modeling_squeezebert.py @@ -16,7 +16,7 @@ import unittest -from transformers import is_torch_available +from transformers import SqueezeBertConfig, is_torch_available from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -28,7 +28,6 @@ if is_torch_available(): from transformers import ( SQUEEZEBERT_PRETRAINED_MODEL_ARCHIVE_LIST, - SqueezeBertConfig, SqueezeBertForMaskedLM, SqueezeBertForMultipleChoice, SqueezeBertForQuestionAnswering, @@ -37,179 +36,181 @@ if is_torch_available(): SqueezeBertModel, ) - class SqueezeBertModelTester(object): - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=False, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=64, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - q_groups=2, - k_groups=2, - v_groups=2, - post_attention_groups=2, - intermediate_groups=4, - output_groups=1, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - self.q_groups = q_groups - self.k_groups = k_groups - self.v_groups = v_groups - self.post_attention_groups = post_attention_groups - self.intermediate_groups = intermediate_groups - self.output_groups = output_groups - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) +class SqueezeBertModelTester(object): + def __init__( + self, + parent, + batch_size=13, + seq_length=7, + is_training=True, + use_input_mask=True, + use_token_type_ids=False, + use_labels=True, + vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=64, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + type_sequence_label_size=2, + initializer_range=0.02, + num_labels=3, + num_choices=4, + scope=None, + q_groups=2, + k_groups=2, + v_groups=2, + post_attention_groups=2, + intermediate_groups=4, + output_groups=1, + ): + self.parent = parent + self.batch_size = batch_size + self.seq_length = seq_length + self.is_training = is_training + self.use_input_mask = use_input_mask + self.use_token_type_ids = use_token_type_ids + self.use_labels = use_labels + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.hidden_act = hidden_act + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.type_sequence_label_size = type_sequence_label_size + self.initializer_range = initializer_range + self.num_labels = num_labels + self.num_choices = num_choices + self.scope = scope + self.q_groups = q_groups + self.k_groups = k_groups + self.v_groups = v_groups + self.post_attention_groups = post_attention_groups + self.intermediate_groups = intermediate_groups + self.output_groups = output_groups - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) + def prepare_config_and_inputs(self): + input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) + input_mask = None + if self.use_input_mask: + input_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = SqueezeBertConfig( - embedding_size=self.hidden_size, - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - attention_probs_dropout_prob=self.hidden_dropout_prob, - attention_dropout=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - initializer_range=self.initializer_range, - q_groups=self.q_groups, - k_groups=self.k_groups, - v_groups=self.v_groups, - post_attention_groups=self.post_attention_groups, - intermediate_groups=self.intermediate_groups, - output_groups=self.output_groups, - ) + sequence_labels = None + token_labels = None + choice_labels = None + if self.use_labels: + sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) + token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) + choice_labels = ids_tensor([self.batch_size], self.num_choices) - return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + config = self.get_config() - def create_and_check_squeezebert_model( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = SqueezeBertModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, input_mask) - result = model(input_ids) - self.parent.assertEqual( - result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size) - ) + return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - def create_and_check_squeezebert_for_masked_lm( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = SqueezeBertForMaskedLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) + def get_config(self): + return SqueezeBertConfig( + embedding_size=self.hidden_size, + vocab_size=self.vocab_size, + hidden_size=self.hidden_size, + num_hidden_layers=self.num_hidden_layers, + num_attention_heads=self.num_attention_heads, + intermediate_size=self.intermediate_size, + hidden_act=self.hidden_act, + attention_probs_dropout_prob=self.hidden_dropout_prob, + attention_dropout=self.attention_probs_dropout_prob, + max_position_embeddings=self.max_position_embeddings, + initializer_range=self.initializer_range, + q_groups=self.q_groups, + k_groups=self.k_groups, + v_groups=self.v_groups, + post_attention_groups=self.post_attention_groups, + intermediate_groups=self.intermediate_groups, + output_groups=self.output_groups, + ) - def create_and_check_squeezebert_for_question_answering( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = SqueezeBertForQuestionAnswering(config=config) - model.to(torch_device) - model.eval() - result = model( - input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels - ) - self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) - self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) + def create_and_check_squeezebert_model( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = SqueezeBertModel(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, input_mask) + result = model(input_ids) + self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - def create_and_check_squeezebert_for_sequence_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = SqueezeBertForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=sequence_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) + def create_and_check_squeezebert_for_masked_lm( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = SqueezeBertForMaskedLM(config=config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - def create_and_check_squeezebert_for_token_classification( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_labels = self.num_labels - model = SqueezeBertForTokenClassification(config=config) - model.to(torch_device) - model.eval() + def create_and_check_squeezebert_for_question_answering( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + model = SqueezeBertForQuestionAnswering(config=config) + model.to(torch_device) + model.eval() + result = model( + input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels + ) + self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) + self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + def create_and_check_squeezebert_for_sequence_classification( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = SqueezeBertForSequenceClassification(config) + model.to(torch_device) + model.eval() + result = model(input_ids, attention_mask=input_mask, labels=sequence_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels)) - def create_and_check_squeezebert_for_multiple_choice( - self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - config.num_choices = self.num_choices - model = SqueezeBertForMultipleChoice(config=config) - model.to(torch_device) - model.eval() - multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() - result = model( - multiple_choice_inputs_ids, - attention_mask=multiple_choice_input_mask, - labels=choice_labels, - ) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) + def create_and_check_squeezebert_for_token_classification( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_labels = self.num_labels + model = SqueezeBertForTokenClassification(config=config) + model.to(torch_device) + model.eval() - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} - return config, inputs_dict + result = model(input_ids, attention_mask=input_mask, labels=token_labels) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels)) + + def create_and_check_squeezebert_for_multiple_choice( + self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels + ): + config.num_choices = self.num_choices + model = SqueezeBertForMultipleChoice(config=config) + model.to(torch_device) + model.eval() + multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() + result = model( + multiple_choice_inputs_ids, + attention_mask=multiple_choice_input_mask, + labels=choice_labels, + ) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices)) + + def prepare_config_and_inputs_for_common(self): + config_and_inputs = self.prepare_config_and_inputs() + (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs + inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} + return config, inputs_dict @require_torch diff --git a/tests/test_modeling_t5.py b/tests/test_modeling_t5.py index 38a353e1a2..56ca3df26d 100644 --- a/tests/test_modeling_t5.py +++ b/tests/test_modeling_t5.py @@ -18,7 +18,7 @@ import copy import tempfile import unittest -from transformers import is_torch_available +from transformers import T5Config, is_torch_available from transformers.file_utils import cached_property from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device @@ -30,7 +30,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor if is_torch_available(): import torch - from transformers import ByT5Tokenizer, T5Config, T5EncoderModel, T5ForConditionalGeneration, T5Model, T5Tokenizer + from transformers import ByT5Tokenizer, T5EncoderModel, T5ForConditionalGeneration, T5Model, T5Tokenizer from transformers.models.t5.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_LIST @@ -100,7 +100,19 @@ class T5ModelTester: if self.use_labels: lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) - config = T5Config( + config = self.get_config() + + return ( + config, + input_ids, + decoder_input_ids, + attention_mask, + decoder_attention_mask, + lm_labels, + ) + + def get_config(self): + return T5Config( vocab_size=self.vocab_size, d_model=self.hidden_size, d_ff=self.d_ff, @@ -117,15 +129,6 @@ class T5ModelTester: decoder_start_token_id=self.decoder_start_token_id, ) - return ( - config, - input_ids, - decoder_input_ids, - attention_mask, - decoder_attention_mask, - lm_labels, - ) - def check_prepare_lm_labels_via_shift_left( self, config, diff --git a/tests/test_modeling_tapas.py b/tests/test_modeling_tapas.py index 32265c25f6..6a4d853fd3 100644 --- a/tests/test_modeling_tapas.py +++ b/tests/test_modeling_tapas.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import copy import unittest @@ -29,6 +28,7 @@ from transformers import ( MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, + TapasConfig, is_torch_available, ) from transformers.file_utils import cached_property @@ -43,7 +43,6 @@ if is_torch_available(): import torch from transformers import ( - TapasConfig, TapasForMaskedLM, TapasForQuestionAnswering, TapasForSequenceClassification, @@ -183,7 +182,24 @@ class TapasModelTester: float_answer = floats_tensor([self.batch_size]).to(torch_device) aggregation_labels = ids_tensor([self.batch_size], self.num_aggregation_labels).to(torch_device) - config = TapasConfig( + config = self.get_config() + + return ( + config, + input_ids, + input_mask, + token_type_ids, + sequence_labels, + token_labels, + labels, + numeric_values, + numeric_values_scale, + float_answer, + aggregation_labels, + ) + + def get_config(self): + return TapasConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, @@ -220,20 +236,6 @@ class TapasModelTester: disable_per_token_loss=self.disable_per_token_loss, ) - return ( - config, - input_ids, - input_mask, - token_type_ids, - sequence_labels, - token_labels, - labels, - numeric_values, - numeric_values_scale, - float_answer, - aggregation_labels, - ) - def create_and_check_model( self, config, diff --git a/tests/test_modeling_transfo_xl.py b/tests/test_modeling_transfo_xl.py index c1637d7e40..51ec77d24d 100644 --- a/tests/test_modeling_transfo_xl.py +++ b/tests/test_modeling_transfo_xl.py @@ -17,7 +17,7 @@ import copy import random import unittest -from transformers import is_torch_available +from transformers import TransfoXLConfig, is_torch_available from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device from .test_configuration_common import ConfigTester @@ -29,7 +29,7 @@ if is_torch_available(): import torch from torch import nn - from transformers import TransfoXLConfig, TransfoXLForSequenceClassification, TransfoXLLMHeadModel, TransfoXLModel + from transformers import TransfoXLForSequenceClassification, TransfoXLLMHeadModel, TransfoXLModel from transformers.models.transfo_xl.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST @@ -69,7 +69,12 @@ class TransfoXLModelTester: if self.use_labels: lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - config = TransfoXLConfig( + config = self.get_config() + + return (config, input_ids_1, input_ids_2, lm_labels) + + def get_config(self): + return TransfoXLConfig( vocab_size=self.vocab_size, mem_len=self.mem_len, clamp_len=self.clamp_len, @@ -85,8 +90,6 @@ class TransfoXLModelTester: pad_token_id=self.pad_token_id, ) - return (config, input_ids_1, input_ids_2, lm_labels) - def set_seed(self): random.seed(self.seed) torch.manual_seed(self.seed) diff --git a/tests/test_modeling_visual_bert.py b/tests/test_modeling_visual_bert.py index c4272d776b..4b48c5f386 100644 --- a/tests/test_modeling_visual_bert.py +++ b/tests/test_modeling_visual_bert.py @@ -14,12 +14,11 @@ # limitations under the License. """ Testing suite for the PyTorch VisualBERT model. """ - import copy import unittest from tests.test_modeling_common import floats_tensor -from transformers import is_torch_available +from transformers import VisualBertConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -30,7 +29,6 @@ if is_torch_available(): import torch from transformers import ( - VisualBertConfig, VisualBertForMultipleChoice, VisualBertForPreTraining, VisualBertForQuestionAnswering, @@ -98,7 +96,7 @@ class VisualBertModelTester: self.num_choices = num_choices self.scope = scope - def prepare_config(self): + def get_config(self): return VisualBertConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, @@ -138,7 +136,7 @@ class VisualBertModelTester: if self.use_visual_token_type_ids: visual_token_type_ids = ids_tensor([self.batch_size, self.visual_seq_length], self.type_vocab_size) - config = self.prepare_config() + config = self.get_config() return config, { "input_ids": input_ids, "token_type_ids": token_type_ids, @@ -198,7 +196,7 @@ class VisualBertModelTester: if self.use_labels: labels = ids_tensor([self.batch_size], self.num_choices) - config = self.prepare_config() + config = self.get_config() return config, { "input_ids": input_ids, "token_type_ids": token_type_ids, diff --git a/tests/test_modeling_vit.py b/tests/test_modeling_vit.py index b45c12c16d..6073bf2392 100644 --- a/tests/test_modeling_vit.py +++ b/tests/test_modeling_vit.py @@ -18,6 +18,7 @@ import inspect import unittest +from transformers import ViTConfig from transformers.file_utils import cached_property, is_torch_available, is_vision_available from transformers.testing_utils import require_torch, require_vision, slow, torch_device @@ -29,7 +30,7 @@ if is_torch_available(): import torch from torch import nn - from transformers import ViTConfig, ViTForImageClassification, ViTModel + from transformers import ViTForImageClassification, ViTModel from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST, to_2tuple @@ -86,7 +87,12 @@ class ViTModelTester: if self.use_labels: labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - config = ViTConfig( + config = self.get_config() + + return config, pixel_values, labels + + def get_config(self): + return ViTConfig( image_size=self.image_size, patch_size=self.patch_size, num_channels=self.num_channels, @@ -101,8 +107,6 @@ class ViTModelTester: initializer_range=self.initializer_range, ) - return config, pixel_values, labels - def create_and_check_model(self, config, pixel_values, labels): model = ViTModel(config=config) model.to(torch_device) diff --git a/tests/test_modeling_wav2vec2.py b/tests/test_modeling_wav2vec2.py index 8b269b88bc..c0bc68d945 100644 --- a/tests/test_modeling_wav2vec2.py +++ b/tests/test_modeling_wav2vec2.py @@ -21,7 +21,7 @@ import unittest import pytest from tests.test_modeling_common import floats_tensor, ids_tensor, random_attention_mask -from transformers import is_torch_available +from transformers import Wav2Vec2Config, is_torch_available from transformers.testing_utils import require_datasets, require_soundfile, require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -32,7 +32,6 @@ if is_torch_available(): import torch from transformers import ( - Wav2Vec2Config, Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC, Wav2Vec2ForMaskedLM, @@ -106,7 +105,12 @@ class Wav2Vec2ModelTester: input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size) attention_mask = random_attention_mask([self.batch_size, self.seq_length]) - config = Wav2Vec2Config( + config = self.get_config() + + return config, input_values, attention_mask + + def get_config(self): + return Wav2Vec2Config( hidden_size=self.hidden_size, feat_extract_norm=self.feat_extract_norm, feat_extract_dropout=self.feat_extract_dropout, @@ -127,8 +131,6 @@ class Wav2Vec2ModelTester: vocab_size=self.vocab_size, ) - return config, input_values, attention_mask - def create_and_check_model(self, config, input_values, attention_mask): model = Wav2Vec2Model(config=config) model.to(torch_device) diff --git a/tests/test_modeling_xlm.py b/tests/test_modeling_xlm.py index 691a4039ea..996bace511 100644 --- a/tests/test_modeling_xlm.py +++ b/tests/test_modeling_xlm.py @@ -13,10 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. - import unittest -from transformers import is_torch_available +from transformers import XLMConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -28,7 +27,6 @@ if is_torch_available(): import torch from transformers import ( - XLMConfig, XLMForMultipleChoice, XLMForQuestionAnswering, XLMForQuestionAnsweringSimple, @@ -97,7 +95,22 @@ class XLMModelTester: is_impossible_labels = ids_tensor([self.batch_size], 2).float() choice_labels = ids_tensor([self.batch_size], self.num_choices) - config = XLMConfig( + config = self.get_config() + + return ( + config, + input_ids, + token_type_ids, + input_lengths, + sequence_labels, + token_labels, + is_impossible_labels, + choice_labels, + input_mask, + ) + + def get_config(self): + return XLMConfig( vocab_size=self.vocab_size, n_special=self.n_special, emb_dim=self.hidden_size, @@ -118,18 +131,6 @@ class XLMModelTester: bos_token_id=self.bos_token_id, ) - return ( - config, - input_ids, - token_type_ids, - input_lengths, - sequence_labels, - token_labels, - is_impossible_labels, - choice_labels, - input_mask, - ) - def create_and_check_xlm_model( self, config, diff --git a/tests/test_modeling_xlnet.py b/tests/test_modeling_xlnet.py index 2ab4940689..c26e24df50 100644 --- a/tests/test_modeling_xlnet.py +++ b/tests/test_modeling_xlnet.py @@ -13,11 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. - import random import unittest -from transformers import is_torch_available +from transformers import XLNetConfig, is_torch_available from transformers.testing_utils import require_torch, slow, torch_device from .test_configuration_common import ConfigTester @@ -29,7 +28,6 @@ if is_torch_available(): import torch from transformers import ( - XLNetConfig, XLNetForMultipleChoice, XLNetForQuestionAnswering, XLNetForQuestionAnsweringSimple, @@ -131,7 +129,25 @@ class XLNetModelTester: is_impossible_labels = ids_tensor([self.batch_size], 2).float() token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - config = XLNetConfig( + config = self.get_config() + + return ( + config, + input_ids_1, + input_ids_2, + input_ids_q, + perm_mask, + input_mask, + target_mapping, + segment_ids, + lm_labels, + sequence_labels, + is_impossible_labels, + token_labels, + ) + + def get_config(self): + return XLNetConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, n_head=self.num_attention_heads, @@ -150,21 +166,6 @@ class XLNetModelTester: eos_token_id=self.eos_token_id, ) - return ( - config, - input_ids_1, - input_ids_2, - input_ids_q, - perm_mask, - input_mask, - target_mapping, - segment_ids, - lm_labels, - sequence_labels, - is_impossible_labels, - token_labels, - ) - def set_seed(self): random.seed(self.seed) torch.manual_seed(self.seed)