Even more TF test fixes (#28146)
* Fix vision text dual encoder * Small cleanup for wav2vec2 (not fixed yet) * Small fix for vision_encoder_decoder * Fix SAM builds * Update TFBertTokenizer test with modern exporting + tokenizer * Fix DeBERTa * Fix DeBERTav2 * Try RAG fix but it's impossible to test locally * Actually fix RAG now that I got FAISS working somehow * Fix Wav2Vec2, add sermon * Fix Hubert
This commit is contained in:
@@ -57,7 +57,7 @@ def convert_tf_weight_name_to_pt_weight_name(
|
|||||||
transposed with regards to each other
|
transposed with regards to each other
|
||||||
"""
|
"""
|
||||||
if name_scope is not None:
|
if name_scope is not None:
|
||||||
if not tf_name.startswith(name_scope):
|
if not tf_name.startswith(name_scope) and "final_logits_bias" not in tf_name:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Weight name {tf_name} does not start with name_scope {name_scope}. This is an internal error "
|
f"Weight name {tf_name} does not start with name_scope {name_scope}. This is an internal error "
|
||||||
"in Transformers, so (unless you were doing something really evil) please open an issue to report it!"
|
"in Transformers, so (unless you were doing something really evil) please open an issue to report it!"
|
||||||
|
|||||||
@@ -638,10 +638,10 @@ class TFDebertaDisentangledSelfAttention(tf.keras.layers.Layer):
|
|||||||
self.pos_dropout.build(None)
|
self.pos_dropout.build(None)
|
||||||
if getattr(self, "pos_proj", None) is not None:
|
if getattr(self, "pos_proj", None) is not None:
|
||||||
with tf.name_scope(self.pos_proj.name):
|
with tf.name_scope(self.pos_proj.name):
|
||||||
self.pos_proj.build(None)
|
self.pos_proj.build([self.config.hidden_size])
|
||||||
if getattr(self, "pos_q_proj", None) is not None:
|
if getattr(self, "pos_q_proj", None) is not None:
|
||||||
with tf.name_scope(self.pos_q_proj.name):
|
with tf.name_scope(self.pos_q_proj.name):
|
||||||
self.pos_q_proj.build(None)
|
self.pos_q_proj.build([self.config.hidden_size])
|
||||||
|
|
||||||
def transpose_for_scores(self, tensor: tf.Tensor) -> tf.Tensor:
|
def transpose_for_scores(self, tensor: tf.Tensor) -> tf.Tensor:
|
||||||
shape = shape_list(tensor)[:-1] + [self.num_attention_heads, -1]
|
shape = shape_list(tensor)[:-1] + [self.num_attention_heads, -1]
|
||||||
|
|||||||
@@ -362,6 +362,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer):
|
|||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def build(self, input_shape=None):
|
def build(self, input_shape=None):
|
||||||
|
if self.built:
|
||||||
|
return
|
||||||
|
self.built = True
|
||||||
with tf.name_scope("conv"):
|
with tf.name_scope("conv"):
|
||||||
self.conv_kernel = self.add_weight(
|
self.conv_kernel = self.add_weight(
|
||||||
name="kernel",
|
name="kernel",
|
||||||
@@ -371,13 +374,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer):
|
|||||||
self.conv_bias = self.add_weight(
|
self.conv_bias = self.add_weight(
|
||||||
name="bias", shape=[self.config.hidden_size], initializer=tf.zeros_initializer()
|
name="bias", shape=[self.config.hidden_size], initializer=tf.zeros_initializer()
|
||||||
)
|
)
|
||||||
return
|
|
||||||
if self.built:
|
|
||||||
return
|
|
||||||
self.built = True
|
|
||||||
if getattr(self, "LayerNorm", None) is not None:
|
if getattr(self, "LayerNorm", None) is not None:
|
||||||
with tf.name_scope(self.LayerNorm.name):
|
with tf.name_scope(self.LayerNorm.name):
|
||||||
self.LayerNorm.build(None)
|
self.LayerNorm.build([None, None, self.config.hidden_size])
|
||||||
if getattr(self, "dropout", None) is not None:
|
if getattr(self, "dropout", None) is not None:
|
||||||
with tf.name_scope(self.dropout.name):
|
with tf.name_scope(self.dropout.name):
|
||||||
self.dropout.build(None)
|
self.dropout.build(None)
|
||||||
@@ -453,7 +452,7 @@ class TFDebertaV2Encoder(tf.keras.layers.Layer):
|
|||||||
self.conv.build(None)
|
self.conv.build(None)
|
||||||
if getattr(self, "LayerNorm", None) is not None:
|
if getattr(self, "LayerNorm", None) is not None:
|
||||||
with tf.name_scope(self.LayerNorm.name):
|
with tf.name_scope(self.LayerNorm.name):
|
||||||
self.LayerNorm.build([None, None, self.config.hidden_size])
|
self.LayerNorm.build([None, self.config.hidden_size])
|
||||||
if getattr(self, "layer", None) is not None:
|
if getattr(self, "layer", None) is not None:
|
||||||
for layer in self.layer:
|
for layer in self.layer:
|
||||||
with tf.name_scope(layer.name):
|
with tf.name_scope(layer.name):
|
||||||
|
|||||||
@@ -401,7 +401,6 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D):
|
|||||||
)
|
)
|
||||||
self.explicit_padding = explicit_padding
|
self.explicit_padding = explicit_padding
|
||||||
self.filter_axis = 2
|
self.filter_axis = 2
|
||||||
self.initialized = False
|
|
||||||
self.kernel_norm_axes = tf.constant([0, 1])
|
self.kernel_norm_axes = tf.constant([0, 1])
|
||||||
|
|
||||||
def _init_norm(self):
|
def _init_norm(self):
|
||||||
@@ -428,13 +427,13 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D):
|
|||||||
dtype=self.weight_v.dtype,
|
dtype=self.weight_v.dtype,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
)
|
)
|
||||||
|
self._init_norm()
|
||||||
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
|
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
if not self.initialized:
|
# TODO Matt: Assigning to attributes in call() is deeply sinful in TensorFlow, as it should be idempotent.
|
||||||
self._init_norm()
|
# This whole layer should be replaced by a layer that doesn't inherit from Conv1D, but instead calls
|
||||||
self.initialized = True
|
# a functional 1d convolution with normalized weights that it generates (but does not store!)
|
||||||
|
|
||||||
self._normalize_kernel()
|
self._normalize_kernel()
|
||||||
|
|
||||||
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
|
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
|
||||||
|
|||||||
@@ -720,6 +720,15 @@ class TFRagModel(TFRagPreTrainedModel):
|
|||||||
generator_dec_attentions=gen_outputs.decoder_attentions,
|
generator_dec_attentions=gen_outputs.decoder_attentions,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def build(self, input_shape=None):
|
||||||
|
if self.built:
|
||||||
|
return
|
||||||
|
self.built = True
|
||||||
|
with tf.name_scope(self.generator.name):
|
||||||
|
self.generator.build(None)
|
||||||
|
with tf.name_scope(self.question_encoder.name):
|
||||||
|
self.question_encoder.build(None)
|
||||||
|
|
||||||
|
|
||||||
@add_start_docstrings_to_model_forward(
|
@add_start_docstrings_to_model_forward(
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -604,6 +604,9 @@ class TFSamMaskDecoder(tf.keras.layers.Layer):
|
|||||||
if getattr(self, "iou_prediction_head", None) is not None:
|
if getattr(self, "iou_prediction_head", None) is not None:
|
||||||
with tf.name_scope(self.iou_prediction_head.name):
|
with tf.name_scope(self.iou_prediction_head.name):
|
||||||
self.iou_prediction_head.build(None)
|
self.iou_prediction_head.build(None)
|
||||||
|
for mlp in self.output_hypernetworks_mlps:
|
||||||
|
with tf.name_scope(mlp.name):
|
||||||
|
mlp.build(None)
|
||||||
|
|
||||||
def call(
|
def call(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -247,16 +247,16 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
|
|||||||
# However, the name of that extra layer is the name of the MainLayer in the base model.
|
# However, the name of that extra layer is the name of the MainLayer in the base model.
|
||||||
if "vision_model" in tf_weight:
|
if "vision_model" in tf_weight:
|
||||||
if tf_weight.count("vision_model") == 1:
|
if tf_weight.count("vision_model") == 1:
|
||||||
return re.sub(r"vision_model\..*?\.", "vision_model.", tf_weight)
|
return (re.sub(r"vision_model\..*?\.", "vision_model.", tf_weight),)
|
||||||
elif tf_weight.count("vision_model") == 2:
|
elif tf_weight.count("vision_model") == 2:
|
||||||
return re.sub(r"vision_model\..*?\.vision_model", "vision_model.vision_model", tf_weight)
|
return (re.sub(r"vision_model\..*?\.vision_model", "vision_model.vision_model", tf_weight),)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unexpected weight name {tf_weight}. Please file an issue on the"
|
f"Unexpected weight name {tf_weight}. Please file an issue on the"
|
||||||
" Transformers repo to let us know about this error!"
|
" Transformers repo to let us know about this error!"
|
||||||
)
|
)
|
||||||
elif "text_model" in tf_weight:
|
elif "text_model" in tf_weight:
|
||||||
return re.sub(r"text_model\..*?\.", "text_model.", tf_weight)
|
return (re.sub(r"text_model\..*?\.", "text_model.", tf_weight),)
|
||||||
else:
|
else:
|
||||||
return (tf_weight,)
|
return (tf_weight,)
|
||||||
|
|
||||||
@@ -598,7 +598,7 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
|
|||||||
if text_model.name != "text_model":
|
if text_model.name != "text_model":
|
||||||
raise ValueError("text model must be created with the name `text_model`.")
|
raise ValueError("text model must be created with the name `text_model`.")
|
||||||
|
|
||||||
model.build() # Ensure model is fully built
|
model.build_in_name_scope() # Ensure model is fully built
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|||||||
@@ -435,7 +435,6 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D):
|
|||||||
)
|
)
|
||||||
self.explicit_padding = explicit_padding
|
self.explicit_padding = explicit_padding
|
||||||
self.filter_axis = 2
|
self.filter_axis = 2
|
||||||
self.initialized = False
|
|
||||||
self.kernel_norm_axes = tf.constant([0, 1])
|
self.kernel_norm_axes = tf.constant([0, 1])
|
||||||
|
|
||||||
def _init_norm(self):
|
def _init_norm(self):
|
||||||
@@ -462,13 +461,13 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D):
|
|||||||
dtype=self.weight_v.dtype,
|
dtype=self.weight_v.dtype,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
)
|
)
|
||||||
|
self._init_norm()
|
||||||
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
|
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
if not self.initialized:
|
# TODO Matt: Assigning to attributes in call() is deeply sinful in TensorFlow, as it should be idempotent.
|
||||||
self._init_norm()
|
# This whole layer should be replaced by a layer that doesn't inherit from Conv1D, but instead calls
|
||||||
self.initialized = True
|
# a functional 1d convolution with normalized weights that it generates (but does not store!)
|
||||||
|
|
||||||
self._normalize_kernel()
|
self._normalize_kernel()
|
||||||
|
|
||||||
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
|
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
|
||||||
@@ -1208,13 +1207,13 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer):
|
|||||||
self.encoder = TFWav2Vec2Encoder(config, name="encoder")
|
self.encoder = TFWav2Vec2Encoder(config, name="encoder")
|
||||||
|
|
||||||
def build(self, input_shape=None):
|
def build(self, input_shape=None):
|
||||||
self.masked_spec_embed = self.add_weight(
|
|
||||||
shape=(self.config.hidden_size,), initializer="uniform", trainable=True, name="masked_spec_embed"
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.built:
|
if self.built:
|
||||||
return
|
return
|
||||||
self.built = True
|
self.built = True
|
||||||
|
if self.config.mask_time_prob > 0.0 or self.config.mask_feature_prob > 0.0:
|
||||||
|
self.masked_spec_embed = self.add_weight(
|
||||||
|
shape=(self.config.hidden_size,), initializer="uniform", trainable=True, name="masked_spec_embed"
|
||||||
|
)
|
||||||
if getattr(self, "feature_extractor", None) is not None:
|
if getattr(self, "feature_extractor", None) is not None:
|
||||||
with tf.name_scope(self.feature_extractor.name):
|
with tf.name_scope(self.feature_extractor.name):
|
||||||
self.feature_extractor.build(None)
|
self.feature_extractor.build(None)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ if is_tf_available():
|
|||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
tokenized = self.tokenizer(inputs)
|
tokenized = self.tokenizer(inputs)
|
||||||
out = self.bert(**tokenized)
|
out = self.bert(tokenized)
|
||||||
return out["pooler_output"]
|
return out["pooler_output"]
|
||||||
|
|
||||||
|
|
||||||
@@ -41,13 +41,8 @@ class BertTokenizationTest(unittest.TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
||||||
self.tokenizers = [
|
self.tokenizers = [BertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
|
||||||
BertTokenizer.from_pretrained(checkpoint) for checkpoint in (TOKENIZER_CHECKPOINTS * 2)
|
self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
|
||||||
] # repeat for when fast_bert_tokenizer=false
|
|
||||||
self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] + [
|
|
||||||
TFBertTokenizer.from_pretrained(checkpoint, use_fast_bert_tokenizer=False)
|
|
||||||
for checkpoint in TOKENIZER_CHECKPOINTS
|
|
||||||
]
|
|
||||||
assert len(self.tokenizers) == len(self.tf_tokenizers)
|
assert len(self.tokenizers) == len(self.tf_tokenizers)
|
||||||
|
|
||||||
self.test_sentences = [
|
self.test_sentences = [
|
||||||
@@ -94,15 +89,15 @@ class BertTokenizationTest(unittest.TestCase):
|
|||||||
self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key]))
|
self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key]))
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_saved_model(self):
|
def test_export_for_inference(self):
|
||||||
for tf_tokenizer in self.tf_tokenizers:
|
for tf_tokenizer in self.tf_tokenizers:
|
||||||
model = ModelToSave(tokenizer=tf_tokenizer)
|
model = ModelToSave(tokenizer=tf_tokenizer)
|
||||||
test_inputs = tf.convert_to_tensor(self.test_sentences)
|
test_inputs = tf.convert_to_tensor(self.test_sentences)
|
||||||
out = model(test_inputs) # Build model with some sample inputs
|
out = model(test_inputs) # Build model with some sample inputs
|
||||||
with TemporaryDirectory() as tempdir:
|
with TemporaryDirectory() as tempdir:
|
||||||
save_path = Path(tempdir) / "saved.model"
|
save_path = Path(tempdir) / "saved.model"
|
||||||
model.save(save_path)
|
model.export(save_path)
|
||||||
loaded_model = tf.keras.models.load_model(save_path)
|
loaded_model = tf.saved_model.load(save_path)
|
||||||
loaded_output = loaded_model(test_inputs)
|
loaded_output = loaded_model.serve(test_inputs)
|
||||||
# We may see small differences because the loaded model is compiled, so we need an epsilon for the test
|
# We may see small differences because the loaded model is compiled, so we need an epsilon for the test
|
||||||
self.assertLessEqual(tf.reduce_max(tf.abs(out - loaded_output)), 1e-5)
|
self.assertLessEqual(tf.reduce_max(tf.abs(out - loaded_output)), 1e-5)
|
||||||
|
|||||||
@@ -1005,6 +1005,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase):
|
|||||||
retriever=rag_retriever,
|
retriever=rag_retriever,
|
||||||
config=rag_config,
|
config=rag_config,
|
||||||
)
|
)
|
||||||
|
rag_sequence.build_in_name_scope()
|
||||||
# check that the from pretrained methods work
|
# check that the from pretrained methods work
|
||||||
rag_sequence.save_pretrained(tmp_dirname)
|
rag_sequence.save_pretrained(tmp_dirname)
|
||||||
rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever)
|
rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever)
|
||||||
@@ -1056,6 +1057,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase):
|
|||||||
retriever=rag_retriever,
|
retriever=rag_retriever,
|
||||||
config=rag_config,
|
config=rag_config,
|
||||||
)
|
)
|
||||||
|
rag_token.build_in_name_scope()
|
||||||
# check that the from pretrained methods work
|
# check that the from pretrained methods work
|
||||||
rag_token.save_pretrained(tmp_dirname)
|
rag_token.save_pretrained(tmp_dirname)
|
||||||
rag_token.from_pretrained(tmp_dirname, retriever=rag_retriever)
|
rag_token.from_pretrained(tmp_dirname, retriever=rag_retriever)
|
||||||
|
|||||||
@@ -858,6 +858,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
|
|||||||
pretrained_encoder_dir,
|
pretrained_encoder_dir,
|
||||||
pretrained_decoder_dir,
|
pretrained_decoder_dir,
|
||||||
)
|
)
|
||||||
|
enc_dec_model.build_in_name_scope()
|
||||||
# check that the from pretrained methods work
|
# check that the from pretrained methods work
|
||||||
enc_dec_model.save_pretrained(tmp_dirname)
|
enc_dec_model.save_pretrained(tmp_dirname)
|
||||||
enc_dec_model = TFVisionEncoderDecoderModel.from_pretrained(tmp_dirname)
|
enc_dec_model = TFVisionEncoderDecoderModel.from_pretrained(tmp_dirname)
|
||||||
|
|||||||
Reference in New Issue
Block a user