Switch return_dict to True by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
This commit is contained in:
Sylvain Gugger
2020-11-16 11:43:00 -05:00
committed by GitHub
parent 0d0a0785fd
commit 1073a2bde5
106 changed files with 138 additions and 234 deletions

View File

@@ -118,7 +118,7 @@ class GenerationTesterMixin:
@staticmethod
def _get_encoder_outputs(model, input_ids, attention_mask, num_interleave=1):
encoder = model.get_encoder()
encoder_outputs = encoder(input_ids, attention_mask=attention_mask, return_dict=True)
encoder_outputs = encoder(input_ids, attention_mask=attention_mask)
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
num_interleave, dim=0
)
@@ -344,6 +344,7 @@ class GenerationTesterMixin:
def test_beam_sample_generate(self):
for model_class in self.all_generative_model_classes:
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
print("Return dict", config.return_dict)
logits_warper_kwargs, logits_warper = self._get_warper_and_kwargs(num_beams=1)
model = model_class(config).to(torch_device)

View File

@@ -102,7 +102,6 @@ class AlbertModelTester:
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
num_hidden_groups=self.num_hidden_groups,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -259,7 +259,6 @@ class BartHeadTests(unittest.TestCase):
eos_token_id=2,
pad_token_id=1,
bos_token_id=0,
return_dict=True,
)
return config, input_ids, batch_size
@@ -310,7 +309,6 @@ class BartHeadTests(unittest.TestCase):
encoder_ffn_dim=8,
decoder_ffn_dim=8,
max_position_embeddings=48,
return_dict=True,
)
lm_model = BartForConditionalGeneration(config).to(torch_device)
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
@@ -713,6 +711,6 @@ class FastIntegrationTests(unittest.TestCase):
padding="longest",
truncation=True,
)
features = self.xsum_1_1_model.get_encoder()(**batch, return_dict=True).last_hidden_state
features = self.xsum_1_1_model.get_encoder()(**batch).last_hidden_state
expected = [[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]]
assert_tensors_close(features[0, :3, :3], torch.tensor(expected), atol=1e-3)

View File

@@ -124,7 +124,6 @@ class BertModelTester:
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -89,7 +89,6 @@ class BertGenerationEncoderTester:
max_position_embeddings=self.max_position_embeddings,
is_decoder=False,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, input_mask, token_labels

View File

@@ -31,7 +31,7 @@ if is_torch_available():
class CamembertModelIntegrationTest(unittest.TestCase):
@slow
def test_output_embeds_base_model(self):
model = CamembertModel.from_pretrained("camembert-base", return_dict=True)
model = CamembertModel.from_pretrained("camembert-base")
model.to(torch_device)
input_ids = torch.tensor(

View File

@@ -657,7 +657,7 @@ class ModelTesterMixin:
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
expected_num_layers = getattr(

View File

@@ -94,7 +94,6 @@ class CTRLModelTester:
n_ctx=self.max_position_embeddings,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range,
return_dict=True,
)
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

View File

@@ -148,7 +148,7 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(list(result.loss.size()), [])
def create_and_check_deberta_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
@@ -160,11 +160,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
sequence_output = model(input_ids)[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_deberta_for_sequence_classification(
@@ -174,14 +171,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
model = DebertaForSequenceClassification(config)
model.to(torch_device)
model.eval()
loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):

View File

@@ -110,7 +110,6 @@ if is_torch_available():
attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -117,7 +117,6 @@ class DPRModelTester:
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
return_dict=True,
)
config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict())

View File

@@ -101,7 +101,6 @@ class ElectraModelTester:
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
return_dict=True,
)
return (

View File

@@ -85,7 +85,6 @@ class EncoderDecoderMixin:
decoder_input_ids=decoder_input_ids,
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
self.assertEqual(
@@ -117,7 +116,6 @@ class EncoderDecoderMixin:
decoder_input_ids=decoder_input_ids,
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
self.assertEqual(
outputs_encoder_decoder["logits"].shape, (decoder_input_ids.shape + (decoder_config.vocab_size,))
@@ -132,7 +130,6 @@ class EncoderDecoderMixin:
decoder_input_ids=decoder_input_ids,
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
self.assertEqual(
@@ -278,7 +275,6 @@ class EncoderDecoderMixin:
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
labels=labels,
return_dict=True,
)
loss = outputs_encoder_decoder["loss"]
@@ -313,7 +309,6 @@ class EncoderDecoderMixin:
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
output_attentions=True,
return_dict=True,
)
encoder_attentions = outputs_encoder_decoder["encoder_attentions"]

View File

@@ -113,7 +113,6 @@ class FlaubertModelTester(object):
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
return_dict=True,
)
return (

View File

@@ -29,7 +29,7 @@ class FlaxBertModelTest(unittest.TestCase):
# Check for simple input
pt_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.PYTORCH)
fx_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.JAX)
pt_outputs = pt_model(**pt_inputs)
pt_outputs = pt_model(**pt_inputs).to_tuple()
fx_outputs = fx_model(**fx_inputs)
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")

View File

@@ -34,7 +34,7 @@ class FlaxRobertaModelTest(unittest.TestCase):
self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")
for fx_output, pt_output in zip(fx_outputs, pt_outputs):
for fx_output, pt_output in zip(fx_outputs, pt_outputs.to_tuple()):
self.assert_almost_equals(fx_output, pt_output.numpy(), 5e-4)
def assert_almost_equals(self, a: ndarray, b: ndarray, tol: float):

View File

@@ -259,7 +259,6 @@ class FSMTHeadTests(unittest.TestCase):
eos_token_id=2,
pad_token_id=1,
bos_token_id=0,
return_dict=True,
)
def _get_config_and_data(self):

View File

@@ -140,7 +140,6 @@ class FunnelModelTester:
activation_dropout=self.activation_dropout,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
return_dict=True,
)
return (

View File

@@ -131,7 +131,6 @@ class GPT2ModelTester:
bos_token_id=self.bos_token_id,
eos_token_id=self.eos_token_id,
pad_token_id=self.pad_token_id,
return_dict=True,
gradient_checkpointing=gradient_checkpointing,
)

View File

@@ -125,7 +125,6 @@ class LayoutLMModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -113,7 +113,6 @@ class LongformerModelTester:
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
attention_window=self.attention_window,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -282,7 +282,6 @@ class LxmertModelTester:
attention_mask=input_mask,
labels=ans,
output_attentions=output_attentions,
return_dict=True,
)
result = model(input_ids, visual_feats, bounding_boxes, labels=ans)
result = model(
@@ -302,7 +301,6 @@ class LxmertModelTester:
attention_mask=input_mask,
labels=ans,
output_attentions=not output_attentions,
return_dict=True,
)
self.parent.assertEqual(result.question_answering_score.shape, (self.batch_size, self.num_qa_labels))
@@ -335,7 +333,6 @@ class LxmertModelTester:
matched_label=matched_label,
ans=ans,
output_attentions=output_attentions,
return_dict=True,
)
result = model(
input_ids,
@@ -390,7 +387,6 @@ class LxmertModelTester:
matched_label=matched_label,
ans=ans,
output_attentions=not output_attentions,
return_dict=True,
)
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
@@ -427,7 +423,6 @@ class LxmertModelTester:
token_type_ids=token_type_ids,
attention_mask=input_mask,
ans=ans,
return_dict=True,
)
result_qa = model_qa(
@@ -437,7 +432,6 @@ class LxmertModelTester:
labels=ans,
token_type_ids=token_type_ids,
attention_mask=input_mask,
return_dict=True,
)
model_pretrain.resize_num_qa_labels(num_small_labels)
@@ -450,7 +444,6 @@ class LxmertModelTester:
token_type_ids=token_type_ids,
attention_mask=input_mask,
ans=less_labels_ans,
return_dict=True,
)
result_qa_less = model_qa(
@@ -460,7 +453,6 @@ class LxmertModelTester:
labels=less_labels_ans,
token_type_ids=token_type_ids,
attention_mask=input_mask,
return_dict=True,
)
model_pretrain.resize_num_qa_labels(num_large_labels)
@@ -473,7 +465,6 @@ class LxmertModelTester:
token_type_ids=token_type_ids,
attention_mask=input_mask,
ans=more_labels_ans,
return_dict=True,
)
result_qa_more = model_qa(
@@ -483,7 +474,6 @@ class LxmertModelTester:
labels=more_labels_ans,
token_type_ids=token_type_ids,
attention_mask=input_mask,
return_dict=True,
)
model_qa_labels = model_qa.num_qa_labels

View File

@@ -50,7 +50,6 @@ class ModelTester:
decoder_ffn_dim=32,
max_position_embeddings=48,
add_final_layer_norm=True,
return_dict=True,
)
def prepare_config_and_inputs_for_common(self):

View File

@@ -37,7 +37,6 @@ class ModelTester:
decoder_ffn_dim=32,
max_position_embeddings=48,
add_final_layer_norm=True,
return_dict=True,
)
def prepare_config_and_inputs_for_common(self):
@@ -132,7 +131,6 @@ class MBartEnroIntegrationTest(AbstractSeq2SeqIntegrationTest):
decoder_ffn_dim=32,
max_position_embeddings=48,
add_final_layer_norm=True,
return_dict=True,
)
lm_model = MBartForConditionalGeneration(config).to(torch_device)
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)

View File

@@ -124,7 +124,6 @@ class MobileBertModelTester:
type_vocab_size=self.type_vocab_size,
is_decoder=False,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -94,7 +94,6 @@ class OpenAIGPTModelTester:
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
pad_token_id=self.pad_token_id,
return_dict=True,
)
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

View File

@@ -33,7 +33,6 @@ class ModelTester:
decoder_ffn_dim=32,
max_position_embeddings=48,
add_final_layer_norm=True,
return_dict=True,
)
def prepare_config_and_inputs_for_common(self):

View File

@@ -142,7 +142,6 @@ class ProphetNetModelTester:
disable_ngram_loss=self.disable_ngram_loss,
max_position_embeddings=self.max_position_embeddings,
is_encoder_decoder=self.is_encoder_decoder,
return_dict=True,
)
return (
@@ -344,7 +343,6 @@ class ProphetNetModelTester:
decoder_input_ids=decoder_input_ids,
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
tied_model_result = tied_model(
@@ -352,7 +350,6 @@ class ProphetNetModelTester:
decoder_input_ids=decoder_input_ids,
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
# check that models has less parameters
@@ -419,7 +416,6 @@ class ProphetNetModelTester:
attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask,
labels=lm_labels,
return_dict=True,
)
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(128.2925, device=torch_device), atol=1e-3))
@@ -433,9 +429,7 @@ class ProphetNetModelTester:
model.to(torch_device)
model.eval()
outputs_no_mask = model(
input_ids=input_ids[:, :5], decoder_input_ids=decoder_input_ids[:, :5], return_dict=True
)
outputs_no_mask = model(input_ids=input_ids[:, :5], decoder_input_ids=decoder_input_ids[:, :5])
attention_mask = torch.ones_like(input_ids)
decoder_attention_mask = torch.ones_like(decoder_input_ids)
@@ -446,7 +440,6 @@ class ProphetNetModelTester:
attention_mask=attention_mask,
decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask,
return_dict=True,
)
# check encoder
@@ -524,7 +517,6 @@ class ProphetNetStandaloneDecoderModelTester:
bos_token_id=1,
eos_token_id=2,
ngram=2,
return_dict=True,
num_buckets=32,
relative_max_distance=128,
disable_ngram_loss=False,
@@ -562,7 +554,6 @@ class ProphetNetStandaloneDecoderModelTester:
self.max_position_embeddings = max_position_embeddings
self.add_cross_attention = add_cross_attention
self.is_encoder_decoder = is_encoder_decoder
self.return_dict = return_dict
self.scope = None
self.decoder_key_length = decoder_seq_length
@@ -602,7 +593,6 @@ class ProphetNetStandaloneDecoderModelTester:
max_position_embeddings=self.max_position_embeddings,
add_cross_attention=self.add_cross_attention,
is_encoder_decoder=self.is_encoder_decoder,
return_dict=self.return_dict,
)
return (
@@ -757,7 +747,6 @@ class ProphetNetStandaloneEncoderModelTester:
pad_token_id=0,
bos_token_id=1,
eos_token_id=2,
return_dict=True,
num_buckets=32,
relative_max_distance=128,
disable_ngram_loss=False,
@@ -794,7 +783,6 @@ class ProphetNetStandaloneEncoderModelTester:
self.max_position_embeddings = max_position_embeddings
self.add_cross_attention = add_cross_attention
self.is_encoder_decoder = is_encoder_decoder
self.return_dict = return_dict
self.scope = None
self.decoder_key_length = decoder_seq_length
@@ -829,7 +817,6 @@ class ProphetNetStandaloneEncoderModelTester:
max_position_embeddings=self.max_position_embeddings,
add_cross_attention=self.add_cross_attention,
is_encoder_decoder=self.is_encoder_decoder,
return_dict=self.return_dict,
)
return (
@@ -919,7 +906,6 @@ class ProphetNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
# methods overwrite method in `test_modeling_common.py`
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_len = getattr(self.model_tester, "seq_length", None)
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
@@ -933,7 +919,6 @@ class ProphetNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Test
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
@@ -1121,7 +1106,6 @@ class ProphetNetModelIntegrationTest(unittest.TestCase):
attention_mask=None,
encoder_outputs=None,
decoder_input_ids=decoder_prev_ids,
return_dict=True,
)
output_predited_logits = output[0]
expected_shape = torch.Size((1, 12, 30522))
@@ -1143,9 +1127,7 @@ class ProphetNetModelIntegrationTest(unittest.TestCase):
assert torch.allclose(encoder_outputs[:, :3, :3], expected_encoder_outputs_slice, atol=1e-4)
# decoder outputs
decoder_outputs = model.prophetnet.decoder(
decoder_prev_ids, encoder_hidden_states=encoder_outputs, return_dict=True
)
decoder_outputs = model.prophetnet.decoder(decoder_prev_ids, encoder_hidden_states=encoder_outputs)
predicting_streams = decoder_outputs[1].view(1, model.config.ngram, 12, -1)
predicting_streams_logits = model.lm_head(predicting_streams)
next_first_stream_logits = predicting_streams_logits[:, 0]

View File

@@ -174,7 +174,6 @@ class ReformerModelTester:
attn_layers=self.attn_layers,
pad_token_id=self.pad_token_id,
hash_seed=self.hash_seed,
return_dict=True,
)
return (

View File

@@ -103,7 +103,6 @@ class RobertaModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -131,7 +131,6 @@ if is_torch_available():
post_attention_groups=self.post_attention_groups,
intermediate_groups=self.intermediate_groups,
output_groups=self.output_groups,
return_dict=True,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -115,7 +115,6 @@ class T5ModelTester:
bos_token_id=self.pad_token_id,
pad_token_id=self.pad_token_id,
decoder_start_token_id=self.decoder_start_token_id,
return_dict=True,
)
return (

View File

@@ -121,7 +121,6 @@ class TFAlbertModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -182,7 +182,6 @@ class TFBartHeadTests(unittest.TestCase):
eos_token_id=2,
pad_token_id=1,
bos_token_id=0,
return_dict=True,
decoder_start_token_id=2,
)
return config, input_ids, batch_size
@@ -206,7 +205,6 @@ class TFBartHeadTests(unittest.TestCase):
encoder_ffn_dim=32,
decoder_ffn_dim=32,
max_position_embeddings=48,
return_dict=True,
)
lm_model = TFBartForConditionalGeneration(config)
context = tf.fill((7, 2), 4)
@@ -356,7 +354,7 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase):
padding="longest",
truncation=True,
)
features = self.xsum_1_1_model.get_encoder()(**batch, return_dict=True).last_hidden_state
features = self.xsum_1_1_model.get_encoder()(**batch).last_hidden_state
import numpy as np
expected = np.array([[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]])

View File

@@ -120,7 +120,6 @@ class TFBertModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -39,7 +39,7 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
dtype=tf.int32,
) # J'aime le camembert !"
output = model(input_ids, return_dict=True)["last_hidden_state"]
output = model(input_ids)["last_hidden_state"]
expected_shape = tf.TensorShape((1, 10, 768))
self.assertEqual(output.shape, expected_shape)
# compare the actual values for a slice.

View File

@@ -284,7 +284,7 @@ class TFModelTesterMixin:
if isinstance(after_outputs, tf.Tensor):
out_1 = after_outputs.numpy()
elif isinstance(after_outputs, dict):
out_1 = after_outputs[list(after_outputs.keys())[0]]
out_1 = after_outputs[list(after_outputs.keys())[0]].numpy()
else:
out_1 = after_outputs[0].numpy()
out_2 = outputs[0].numpy()

View File

@@ -94,7 +94,6 @@ class TFCTRLModelTester(object):
n_ctx=self.max_position_embeddings,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range,
return_dict=True,
)
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

View File

@@ -91,7 +91,6 @@ class TFDistilBertModelTester:
attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -97,7 +97,6 @@ class TFElectraModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -114,7 +114,6 @@ class TFFlaubertModelTester:
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
return_dict=True,
)
return (

View File

@@ -137,7 +137,6 @@ class TFFunnelModelTester:
activation_dropout=self.activation_dropout,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
return_dict=True,
)
return (

View File

@@ -104,7 +104,6 @@ class TFGPT2ModelTester:
# initializer_range=self.initializer_range
bos_token_id=self.bos_token_id,
eos_token_id=self.eos_token_id,
return_dict=True,
)
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

View File

@@ -594,7 +594,9 @@ class TFLongformerModelIntegrationTest(unittest.TestCase):
# 'Hello world! ' repeated 1000 times
input_ids = tf.convert_to_tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=tf.dtypes.int32)
loss, prediction_scores = model(input_ids, labels=input_ids)
output = model(input_ids, labels=input_ids)
loss = output.loss
prediction_scores = output.logits
expected_loss = tf.constant(0.0073798)
expected_prediction_scores_sum = tf.constant(-610476600.0)

View File

@@ -297,7 +297,6 @@ class TFLxmertModelTester(object):
matched_label=matched_label,
ans=ans,
output_attentions=output_attentions,
return_dict=True,
)
result = model(
input_ids,
@@ -352,7 +351,6 @@ class TFLxmertModelTester(object):
matched_label=matched_label,
ans=ans,
output_attentions=not output_attentions,
return_dict=True,
)
self.parent.assertEqual(result.prediction_logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
@@ -695,7 +693,8 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
model = tf.keras.models.load_model(tmpdirname)
outputs = model(class_inputs_dict)
language_hidden_states, vision_hidden_states = outputs[-2], outputs[-1]
language_hidden_states = outputs["language_hidden_states"]
vision_hidden_states = outputs["vision_hidden_states"]
self.assertEqual(len(language_hidden_states), self.model_tester.num_hidden_layers["language"] + 1)
self.assertEqual(len(vision_hidden_states), self.model_tester.num_hidden_layers["vision"] + 1)
@@ -731,11 +730,9 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
model = tf.keras.models.load_model(tmpdirname)
outputs = model(class_inputs_dict)
language_attentions, vision_attentions, cross_encoder_attentions = (
outputs[-3],
outputs[-2],
outputs[-1],
)
language_attentions = outputs["language_attentions"]
vision_attentions = outputs["vision_attentions"]
cross_encoder_attentions = outputs["cross_encoder_attentions"]
self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"])
self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"])

View File

@@ -139,7 +139,6 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
embedding_size=self.embedding_size,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -99,7 +99,6 @@ class TFOpenAIGPTModelTester:
n_ctx=self.max_position_embeddings,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range,
return_dict=True,
)
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

View File

@@ -97,7 +97,6 @@ class TFRobertaModelTester:
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range,
return_dict=True,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

View File

@@ -78,7 +78,6 @@ class TFT5ModelTester:
bos_token_id=self.pad_token_id,
pad_token_id=self.pad_token_id,
decoder_start_token_id=self.pad_token_id,
return_dict=True,
)
return (config, input_ids, input_mask, token_labels)

View File

@@ -77,7 +77,6 @@ class TFTransfoXLModelTester:
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
return_dict=True,
)
return (config, input_ids_1, input_ids_2, lm_labels)

View File

@@ -114,7 +114,6 @@ class TFXLMModelTester:
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
return_dict=True,
)
return (

View File

@@ -39,7 +39,7 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
"attention_mask": tf.convert_to_tensor([[1, 1, 1, 1, 1, 1]], dtype=tf.int32),
}
output = model(features, return_dict=True)["last_hidden_state"]
output = model(features)["last_hidden_state"]
expected_shape = tf.TensorShape((1, 6, 768))
self.assertEqual(output.shape, expected_shape)
# compare the actual values for a slice.

View File

@@ -111,7 +111,6 @@ class TFXLNetModelTester:
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
return_dict=True,
)
return (

View File

@@ -78,7 +78,6 @@ class TransfoXLModelTester:
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
return_dict=True,
)
return (config, input_ids_1, input_ids_2, lm_labels)

View File

@@ -116,7 +116,6 @@ class XLMModelTester:
use_proj=self.use_proj,
num_labels=self.num_labels,
bos_token_id=self.bos_token_id,
return_dict=True,
)
return (

View File

@@ -32,7 +32,7 @@ if is_torch_available():
class XLMRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_xlm_roberta_base(self):
model = XLMRobertaModel.from_pretrained("xlm-roberta-base", return_dict=True)
model = XLMRobertaModel.from_pretrained("xlm-roberta-base")
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
# The dog is cute and lives in the garden house
@@ -51,7 +51,7 @@ class XLMRobertaModelIntegrationTest(unittest.TestCase):
@slow
def test_xlm_roberta_large(self):
model = XLMRobertaModel.from_pretrained("xlm-roberta-large", return_dict=True)
model = XLMRobertaModel.from_pretrained("xlm-roberta-large")
input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
# The dog is cute and lives in the garden house

View File

@@ -148,7 +148,6 @@ class XLNetModelTester:
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
return_dict=True,
)
return (