Move TF building to an actual build() method (#23760)
* A fun new PR where I break the entire codebase again * A fun new PR where I break the entire codebase again * Handle cross-attention * Move calls to model(model.dummy_inputs) to the new build() method * Seeing what fails with the build context thing * make fix-copies * Let's see what fails with new build methods * Fix the pytorch crossload build calls * Fix the overridden build methods in vision_text_dual_encoder * Make sure all our build methods set self.built or call super().build(), which also sets it * make fix-copies * Remove finished TODO * Tentatively remove unneeded (?) line * Transpose b in deberta correctly and remove unused threading local * Get rid of build_with_dummies and all it stands for * Rollback some changes to TF-PT crossloading * Correctly call super().build()
This commit is contained in:
@@ -328,7 +328,7 @@ class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester
|
||||
old_total_size = config.vocab_size
|
||||
new_total_size = old_total_size + new_tokens_size
|
||||
model = model_class(config=copy.deepcopy(config)) # `resize_token_embeddings` mutates `config`
|
||||
model(model.dummy_inputs) # builds the embeddings layer
|
||||
model.build()
|
||||
model.resize_token_embeddings(new_total_size)
|
||||
|
||||
# fetch the output for an input exclusively made of new members of the vocabulary
|
||||
|
||||
@@ -1070,9 +1070,9 @@ class TFEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
|
||||
# create two random BERT models for bert2bert & initialize weights (+cross_attention weights)
|
||||
encoder = TFBertModel(config.encoder)
|
||||
encoder(encoder.dummy_inputs)
|
||||
encoder.build()
|
||||
decoder = TFBertLMHeadModel(config.decoder)
|
||||
decoder(decoder.dummy_inputs)
|
||||
decoder.build()
|
||||
|
||||
encoder_decoder_orig = TFEncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||
|
||||
|
||||
@@ -463,7 +463,7 @@ class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester
|
||||
continue
|
||||
|
||||
model = model_class(config)
|
||||
model(model.dummy_inputs)
|
||||
model.build()
|
||||
|
||||
onnx_model_proto, _ = tf2onnx.convert.from_keras(model, opset=self.onnx_min_opset)
|
||||
|
||||
|
||||
@@ -194,7 +194,7 @@ class TFOPTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
||||
else:
|
||||
# Here we build the word embeddings weights if not exists.
|
||||
# And then we retry to get the attribute once built.
|
||||
model(model.dummy_inputs)
|
||||
model.build()
|
||||
if hasattr(embedding_layer, "weight"):
|
||||
return embedding_layer.weight
|
||||
else:
|
||||
|
||||
@@ -729,9 +729,9 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
||||
|
||||
# create two random ViT/GPT2 models for vit-gpt2 & initialize weights (+cross_attention weights)
|
||||
encoder = TFViTModel(config.encoder)
|
||||
encoder(encoder.dummy_inputs)
|
||||
encoder.build()
|
||||
decoder = TFGPT2LMHeadModel(config.decoder)
|
||||
decoder(decoder.dummy_inputs)
|
||||
decoder.build()
|
||||
|
||||
encoder_decoder_orig = TFVisionEncoderDecoderModel(encoder=encoder, decoder=decoder)
|
||||
|
||||
|
||||
@@ -281,7 +281,7 @@ class TFWhisperModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestC
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
model(model.dummy_inputs)
|
||||
model.build()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname, saved_model=False)
|
||||
|
||||
Reference in New Issue
Block a user