use scale=1.0 in floats_tensor called in speech model testers (#17007)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -116,7 +116,7 @@ class Data2VecAudioModelTester:
|
||||
self.adapter_output_seq_length = (self.output_seq_length - 1) // adapter_stride + 1
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -106,7 +106,7 @@ class HubertModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -143,7 +143,7 @@ class PerceiverModelTester:
|
||||
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
|
||||
|
||||
if model_class is None or model_class.__name__ == "PerceiverModel":
|
||||
inputs = floats_tensor([self.batch_size, self.seq_length, config.d_model], self.vocab_size)
|
||||
inputs = floats_tensor([self.batch_size, self.seq_length, config.d_model], scale=1.0)
|
||||
return config, inputs, input_mask, sequence_labels, token_labels
|
||||
elif model_class.__name__ in ["PerceiverForMaskedLM", "PerceiverForSequenceClassification"]:
|
||||
inputs = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
|
||||
@@ -108,7 +108,7 @@ class SEWModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length // self.squeeze_factor
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -122,7 +122,7 @@ class SEWDModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length // self.squeeze_factor
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -582,7 +582,7 @@ class FlaxWav2Vec2GPT2ModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
"facebook/wav2vec2-large-lv60", "gpt2-medium"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], model.config.encoder.vocab_size)
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 512])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], model.config.decoder.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
@@ -638,7 +638,7 @@ class FlaxWav2Vec2GPT2ModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
|
||||
# prepare inputs
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], fx_model.config.encoder.vocab_size)
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 512])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], fx_model.config.decoder.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
@@ -699,7 +699,7 @@ class FlaxWav2Vec2BartModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
"facebook/wav2vec2-large-lv60", "bart-large"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], model.config.encoder.vocab_size)
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 512])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], model.config.decoder.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
@@ -755,7 +755,7 @@ class FlaxWav2Vec2BartModelTest(FlaxEncoderDecoderMixin, unittest.TestCase):
|
||||
|
||||
# prepare inputs
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], fx_model.config.encoder.vocab_size)
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 512])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], fx_model.config.decoder.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
|
||||
@@ -425,7 +425,7 @@ class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
"facebook/wav2vec2-base-960h", "bert-base-cased"
|
||||
)
|
||||
batch_size = 13
|
||||
input_values = floats_tensor([batch_size, 512], model.encoder.config.vocab_size)
|
||||
input_values = floats_tensor([batch_size, 512], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 512])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], model.decoder.config.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
@@ -489,7 +489,7 @@ class Speech2TextBertModelTest(EncoderDecoderMixin, unittest.TestCase):
|
||||
"facebook/s2t-small-librispeech-asr", "bert-base-cased"
|
||||
)
|
||||
batch_size = 13
|
||||
input_features = floats_tensor([batch_size, 7, 80], model.encoder.config.vocab_size)
|
||||
input_features = floats_tensor([batch_size, 7, 80], scale=1.0)
|
||||
attention_mask = random_attention_mask([batch_size, 7])
|
||||
decoder_input_ids = ids_tensor([batch_size, 4], model.decoder.config.vocab_size)
|
||||
decoder_attention_mask = random_attention_mask([batch_size, 4])
|
||||
|
||||
@@ -107,7 +107,7 @@ class UniSpeechModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -121,7 +121,7 @@ class UniSpeechSatModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
@@ -306,7 +306,7 @@ class UniSpeechSatModelTester:
|
||||
model.freeze_base_model()
|
||||
|
||||
# use a longer sequence length to account for TDNN temporal downsampling
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length * 2], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length * 2], scale=1.0)
|
||||
|
||||
input_lengths = [input_values.shape[-1] // i for i in [4, 2, 1]]
|
||||
labels = ids_tensor((input_values.shape[0], 1), len(model.config.id2label))
|
||||
|
||||
@@ -117,7 +117,7 @@ class FlaxWav2Vec2ModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = Wav2Vec2Config(
|
||||
|
||||
@@ -150,7 +150,7 @@ class Wav2Vec2ModelTester:
|
||||
self.adapter_output_seq_length = (self.output_seq_length - 1) // adapter_stride + 1
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
@@ -114,7 +114,7 @@ class WavLMModelTester:
|
||||
self.encoder_seq_length = self.output_seq_length
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
input_values = floats_tensor([self.batch_size, self.seq_length], scale=1.0)
|
||||
attention_mask = random_attention_mask([self.batch_size, self.seq_length])
|
||||
|
||||
config = self.get_config()
|
||||
|
||||
Reference in New Issue
Block a user