[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming and keras compile (#5395)
* add first version of clm tf * make style * add more tests for bert * update tf clm loss * fix tests * correct tf ner script * add mlm loss * delete bogus file * clean tf auto model + add tests * finish adding clm loss everywhere * fix training in distilbert * fix flake8 * save intermediate * fix tf t5 naming * remove prints * finish up * up * fix tf gpt2 * fix new test utils import * fix flake8 * keep backward compatibility * Update src/transformers/modeling_tf_albert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_electra.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_roberta.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_mobilebert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_auto.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_bert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/modeling_tf_distilbert.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * apply sylvains suggestions Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
33e43edddc
commit
4dc65591b5
@@ -17,7 +17,7 @@
|
||||
import unittest
|
||||
|
||||
from transformers import is_torch_available
|
||||
from transformers.testing_utils import require_torch, torch_device
|
||||
from transformers.testing_utils import require_torch, slow, torch_device
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_common import ModelTesterMixin, ids_tensor
|
||||
@@ -32,6 +32,7 @@ if is_torch_available():
|
||||
DistilBertForTokenClassification,
|
||||
DistilBertForQuestionAnswering,
|
||||
DistilBertForSequenceClassification,
|
||||
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
class DistilBertModelTester(object):
|
||||
@@ -276,8 +277,8 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_distilbert_for_multiple_choice(*config_and_inputs)
|
||||
|
||||
# @slow
|
||||
# def test_model_from_pretrained(self):
|
||||
# for model_name in DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
# model = DistilBertModel.from_pretrained(model_name)
|
||||
# self.assertIsNotNone(model)
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
model = DistilBertModel.from_pretrained(model_name)
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@@ -24,6 +24,8 @@ if is_tf_available():
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
BertConfig,
|
||||
GPT2Config,
|
||||
T5Config,
|
||||
TFAutoModel,
|
||||
TFBertModel,
|
||||
TFAutoModelForPreTraining,
|
||||
@@ -35,6 +37,25 @@ if is_tf_available():
|
||||
TFBertForSequenceClassification,
|
||||
TFAutoModelForQuestionAnswering,
|
||||
TFBertForQuestionAnswering,
|
||||
TFAutoModelForCausalLM,
|
||||
TFGPT2LMHeadModel,
|
||||
TFAutoModelForMaskedLM,
|
||||
TFAutoModelForSeq2SeqLM,
|
||||
TFT5ForConditionalGeneration,
|
||||
)
|
||||
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.modeling_tf_gpt2 import TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.modeling_tf_t5 import TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST
|
||||
from transformers.modeling_tf_auto import (
|
||||
TF_MODEL_MAPPING,
|
||||
TF_MODEL_FOR_PRETRAINING_MAPPING,
|
||||
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_WITH_LM_HEAD_MAPPING,
|
||||
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
TF_MODEL_FOR_MASKED_LM_MAPPING,
|
||||
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
)
|
||||
|
||||
|
||||
@@ -72,10 +93,21 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
self.assertIsNotNone(model)
|
||||
self.assertIsInstance(model, TFBertForPreTraining)
|
||||
|
||||
@slow
|
||||
def test_model_for_causal_lm(self):
|
||||
for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, GPT2Config)
|
||||
|
||||
model = TFAutoModelForCausalLM.from_pretrained(model_name)
|
||||
model, loading_info = TFAutoModelForCausalLM.from_pretrained(model_name, output_loading_info=True)
|
||||
self.assertIsNotNone(model)
|
||||
self.assertIsInstance(model, TFGPT2LMHeadModel)
|
||||
|
||||
@slow
|
||||
def test_lmhead_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
for model_name in ["bert-base-uncased"]:
|
||||
for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
@@ -84,6 +116,30 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
self.assertIsNotNone(model)
|
||||
self.assertIsInstance(model, TFBertForMaskedLM)
|
||||
|
||||
@slow
|
||||
def test_model_for_masked_lm(self):
|
||||
for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, BertConfig)
|
||||
|
||||
model = TFAutoModelForMaskedLM.from_pretrained(model_name)
|
||||
model, loading_info = TFAutoModelForMaskedLM.from_pretrained(model_name, output_loading_info=True)
|
||||
self.assertIsNotNone(model)
|
||||
self.assertIsInstance(model, TFBertForMaskedLM)
|
||||
|
||||
@slow
|
||||
def test_model_for_encoder_decoder_lm(self):
|
||||
for model_name in TF_T5_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
config = AutoConfig.from_pretrained(model_name)
|
||||
self.assertIsNotNone(config)
|
||||
self.assertIsInstance(config, T5Config)
|
||||
|
||||
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
|
||||
model, loading_info = TFAutoModelForSeq2SeqLM.from_pretrained(model_name, output_loading_info=True)
|
||||
self.assertIsNotNone(model)
|
||||
self.assertIsInstance(model, TFT5ForConditionalGeneration)
|
||||
|
||||
@slow
|
||||
def test_sequence_classification_model_from_pretrained(self):
|
||||
# for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
@@ -119,3 +175,28 @@ class TFAutoModelTest(unittest.TestCase):
|
||||
self.assertIsInstance(model, TFRobertaForMaskedLM)
|
||||
self.assertEqual(model.num_parameters(), 14830)
|
||||
self.assertEqual(model.num_parameters(only_trainable=True), 14830)
|
||||
|
||||
def test_parents_and_children_in_mappings(self):
|
||||
# Test that the children are placed before the parents in the mappings, as the `instanceof` will be triggered
|
||||
# by the parents and will return the wrong configuration type when using auto models
|
||||
mappings = (
|
||||
TF_MODEL_MAPPING,
|
||||
TF_MODEL_FOR_PRETRAINING_MAPPING,
|
||||
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_WITH_LM_HEAD_MAPPING,
|
||||
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
TF_MODEL_FOR_MASKED_LM_MAPPING,
|
||||
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
)
|
||||
|
||||
for mapping in mappings:
|
||||
mapping = tuple(mapping.items())
|
||||
for index, (child_config, child_model) in enumerate(mapping[1:]):
|
||||
for parent_config, parent_model in mapping[: index + 1]:
|
||||
with self.subTest(
|
||||
msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__)
|
||||
):
|
||||
self.assertFalse(issubclass(child_config, parent_config))
|
||||
self.assertFalse(issubclass(child_model, parent_model))
|
||||
|
||||
@@ -27,6 +27,7 @@ if is_tf_available():
|
||||
import tensorflow as tf
|
||||
from transformers.modeling_tf_bert import (
|
||||
TFBertModel,
|
||||
TFBertLMHeadModel,
|
||||
TFBertForMaskedLM,
|
||||
TFBertForNextSentencePrediction,
|
||||
TFBertForPreTraining,
|
||||
@@ -142,11 +143,30 @@ class TFBertModelTester:
|
||||
)
|
||||
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
|
||||
|
||||
def create_and_check_bert_lm_head(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
config.is_decoder = True
|
||||
model = TFBertLMHeadModel(config=config)
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(prediction_scores,) = model(inputs)
|
||||
self.parent.assertListEqual(
|
||||
list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
|
||||
def create_and_check_bert_for_masked_lm(
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFBertForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
@@ -186,11 +206,14 @@ class TFBertModelTester:
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFBertForSequenceClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
(logits,) = model(inputs)
|
||||
result = {"logits": logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_bert_for_multiple_choice(
|
||||
@@ -207,9 +230,7 @@ class TFBertModelTester:
|
||||
"token_type_ids": multiple_choice_token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = {"logits": logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_bert_for_token_classification(
|
||||
@@ -217,7 +238,11 @@ class TFBertModelTester:
|
||||
):
|
||||
config.num_labels = self.num_labels
|
||||
model = TFBertForTokenClassification(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
@@ -228,12 +253,14 @@ class TFBertModelTester:
|
||||
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFBertForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
@@ -285,6 +312,10 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
|
||||
|
||||
def test_for_causal_lm(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_bert_lm_head(*config_and_inputs)
|
||||
|
||||
def test_for_multiple_choice(self):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
|
||||
|
||||
@@ -38,6 +38,9 @@ if is_tf_available():
|
||||
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
|
||||
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||
TF_MODEL_FOR_MASKED_LM_MAPPING,
|
||||
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||
)
|
||||
|
||||
if _tf_gpu_memory_limit is not None:
|
||||
@@ -93,6 +96,12 @@ class TFModelTesterMixin:
|
||||
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size)
|
||||
elif model_class in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values():
|
||||
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
|
||||
elif model_class in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
|
||||
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
|
||||
elif model_class in TF_MODEL_FOR_MASKED_LM_MAPPING.values():
|
||||
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
|
||||
elif model_class in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values():
|
||||
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
|
||||
return inputs_dict
|
||||
|
||||
def test_initialization(self):
|
||||
@@ -291,7 +300,7 @@ class TFModelTesterMixin:
|
||||
"decoder_input_ids": tf.keras.Input(
|
||||
batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"
|
||||
),
|
||||
"inputs": tf.keras.Input(batch_shape=(2, 2000), name="inputs", dtype="int32"),
|
||||
"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
|
||||
}
|
||||
elif model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
|
||||
input_ids = tf.keras.Input(batch_shape=(4, 2, 2000), name="input_ids", dtype="int32")
|
||||
@@ -325,7 +334,7 @@ class TFModelTesterMixin:
|
||||
outputs_dict = model(self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
inputs_keywords = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
|
||||
input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
|
||||
input_ids = inputs_keywords.pop("input_ids", None)
|
||||
outputs_keywords = model(input_ids, **inputs_keywords)
|
||||
output_dict = outputs_dict[0].numpy()
|
||||
output_keywords = outputs_keywords[0].numpy()
|
||||
@@ -479,9 +488,9 @@ class TFModelTesterMixin:
|
||||
input_ids = inputs["input_ids"]
|
||||
del inputs["input_ids"]
|
||||
else:
|
||||
encoder_input_ids = inputs["inputs"]
|
||||
encoder_input_ids = inputs["input_ids"]
|
||||
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
|
||||
del inputs["inputs"]
|
||||
del inputs["input_ids"]
|
||||
inputs.pop("decoder_input_ids", None)
|
||||
|
||||
wte = model.get_input_embeddings()
|
||||
@@ -596,9 +605,15 @@ class TFModelTesterMixin:
|
||||
added_label = prepared_for_class[list(prepared_for_class.keys() - inputs_dict.keys())[0]]
|
||||
loss_size = tf.size(added_label)
|
||||
|
||||
if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
|
||||
# if loss is causal lm loss, labels are shift, so that one label per batch
|
||||
# is cut
|
||||
loss_size = loss_size - self.model_tester.batch_size
|
||||
|
||||
# Test that model correctly compute the loss with kwargs
|
||||
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
|
||||
input_ids = prepared_for_class.pop("input_ids")
|
||||
|
||||
loss = model(input_ids, **prepared_for_class)[0]
|
||||
self.assertEqual(loss.shape, [loss_size])
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
import unittest
|
||||
|
||||
from transformers import DistilBertConfig, is_tf_available
|
||||
from transformers.testing_utils import require_tf
|
||||
from transformers.testing_utils import require_tf, slow
|
||||
|
||||
from .test_configuration_common import ConfigTester
|
||||
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
|
||||
@@ -32,6 +32,7 @@ if is_tf_available():
|
||||
TFDistilBertForSequenceClassification,
|
||||
TFDistilBertForTokenClassification,
|
||||
TFDistilBertForMultipleChoice,
|
||||
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
)
|
||||
|
||||
|
||||
@@ -118,9 +119,7 @@ class TFDistilBertModelTester:
|
||||
model = TFDistilBertForMaskedLM(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
(prediction_scores,) = model(inputs)
|
||||
result = {
|
||||
"prediction_scores": prediction_scores.numpy(),
|
||||
}
|
||||
result = {"prediction_scores": prediction_scores.numpy()}
|
||||
self.parent.assertListEqual(
|
||||
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
|
||||
)
|
||||
@@ -129,12 +128,12 @@ class TFDistilBertModelTester:
|
||||
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
|
||||
):
|
||||
model = TFDistilBertForQuestionAnswering(config=config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {
|
||||
"start_logits": start_logits.numpy(),
|
||||
"end_logits": end_logits.numpy(),
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"attention_mask": input_mask,
|
||||
}
|
||||
start_logits, end_logits = model(inputs)
|
||||
result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
|
||||
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
|
||||
|
||||
@@ -145,9 +144,7 @@ class TFDistilBertModelTester:
|
||||
model = TFDistilBertForSequenceClassification(config)
|
||||
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = {"logits": logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
|
||||
|
||||
def create_and_check_distilbert_for_multiple_choice(
|
||||
@@ -162,9 +159,7 @@ class TFDistilBertModelTester:
|
||||
"attention_mask": multiple_choice_input_mask,
|
||||
}
|
||||
(logits,) = model(inputs)
|
||||
result = {
|
||||
"logits": logits.numpy(),
|
||||
}
|
||||
result = {"logits": logits.numpy()}
|
||||
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
|
||||
|
||||
def create_and_check_distilbert_for_token_classification(
|
||||
@@ -236,8 +231,8 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_distilbert_for_token_classification(*config_and_inputs)
|
||||
|
||||
# @slow
|
||||
# def test_model_from_pretrained(self):
|
||||
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
# model = DistilBertModesss.from_pretrained(model_name)
|
||||
# self.assertIsNotNone(model)
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in list(TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]):
|
||||
model = TFDistilBertModel.from_pretrained(model_name)
|
||||
self.assertIsNotNone(model)
|
||||
|
||||
@@ -77,6 +77,7 @@ class TFT5ModelTester:
|
||||
eos_token_id=self.eos_token_id,
|
||||
bos_token_id=self.pad_token_id,
|
||||
pad_token_id=self.pad_token_id,
|
||||
decoder_start_token_id=self.pad_token_id,
|
||||
)
|
||||
|
||||
return (config, input_ids, input_mask, token_labels)
|
||||
@@ -84,7 +85,7 @@ class TFT5ModelTester:
|
||||
def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
|
||||
model = TFT5Model(config=config)
|
||||
inputs = {
|
||||
"inputs": input_ids,
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": input_ids,
|
||||
"decoder_attention_mask": input_mask,
|
||||
}
|
||||
@@ -115,7 +116,7 @@ class TFT5ModelTester:
|
||||
def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
|
||||
model = TFT5ForConditionalGeneration(config=config)
|
||||
inputs_dict = {
|
||||
"inputs": input_ids,
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": input_ids,
|
||||
"decoder_attention_mask": input_mask,
|
||||
}
|
||||
@@ -209,7 +210,7 @@ class TFT5ModelTester:
|
||||
config_and_inputs = self.prepare_config_and_inputs()
|
||||
(config, input_ids, input_mask, token_labels) = config_and_inputs
|
||||
inputs_dict = {
|
||||
"inputs": input_ids,
|
||||
"input_ids": input_ids,
|
||||
"decoder_input_ids": input_ids,
|
||||
"decoder_attention_mask": input_mask,
|
||||
"use_cache": tf.convert_to_tensor([False]),
|
||||
|
||||
Reference in New Issue
Block a user