XLA train step fixes (#17973)
* Copy inputs to train and test step before modifying them, as this breaks things * Add XLA tests, fix our loss functions to be XLA-compatible * make fixup * Update loss computation test to expect vector of per-sample losses * Patch loss for TFLED * Patch loss for TFAlbert * Add a tf_legacy_loss config flag that enables old loss functions * Stop using config.get() because it's not a dict * Skip loss computation test for RAG because its loss is very strange and I'm afraid to rewrite it * make fixup * Add XLA-compatible RAG loss * Fix dtype of loss mask for TFAlbert * Fix test for XLNet too because it overrides the default one * make fixup * Fix config test * No more depending on GPU NaN behaviour * Add test, avoid potential zero division * Fix test item assignment * Fix loss computation masking test * make fixup * Fix dtype bugs
This commit is contained in:
@@ -18,6 +18,7 @@ import copy
|
||||
import os
|
||||
import tempfile
|
||||
from importlib import import_module
|
||||
from math import isnan
|
||||
|
||||
from transformers import is_tf_available
|
||||
from transformers.models.auto import get_values
|
||||
@@ -134,6 +135,72 @@ class TFCoreModelTesterMixin:
|
||||
outputs = run_in_graph_mode()
|
||||
self.assertIsNotNone(outputs)
|
||||
|
||||
@slow
|
||||
def test_xla_fit(self):
|
||||
# This is a copy of the test_keras_fit method, but we use XLA compilation instead of eager
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
if getattr(model, "hf_compute_loss", None):
|
||||
# Test that model correctly compute the loss with kwargs
|
||||
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
|
||||
# Is there a better way to remove these decoder inputs?
|
||||
prepared_for_class = {
|
||||
key: val
|
||||
for key, val in prepared_for_class.items()
|
||||
if key not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "decoder_input_ids")
|
||||
}
|
||||
|
||||
possible_label_cols = {
|
||||
"labels",
|
||||
"label",
|
||||
"label_ids",
|
||||
"start_positions",
|
||||
"start_position",
|
||||
"end_positions",
|
||||
"end_position",
|
||||
"next_sentence_label",
|
||||
}
|
||||
label_names = possible_label_cols.intersection(set(prepared_for_class))
|
||||
self.assertGreater(len(label_names), 0, msg="No matching label names found!")
|
||||
labels = {key: val for key, val in prepared_for_class.items() if key in label_names}
|
||||
inputs_minus_labels = {key: val for key, val in prepared_for_class.items() if key not in label_names}
|
||||
self.assertGreater(len(inputs_minus_labels), 0)
|
||||
|
||||
# Make sure it works with XLA!
|
||||
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), jit_compile=True)
|
||||
# Make sure the model fits without crashing regardless of where we pass the labels
|
||||
history = model.fit(
|
||||
prepared_for_class,
|
||||
validation_data=prepared_for_class,
|
||||
steps_per_epoch=1,
|
||||
validation_steps=1,
|
||||
shuffle=False,
|
||||
verbose=0,
|
||||
)
|
||||
loss = history.history["loss"][0]
|
||||
self.assertTrue(not isnan(loss))
|
||||
val_loss = history.history["val_loss"][0]
|
||||
self.assertTrue(not isnan(val_loss))
|
||||
|
||||
# Now test it with separate labels, to make sure that path works in XLA too.
|
||||
model = model_class(config)
|
||||
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), jit_compile=True)
|
||||
history = model.fit(
|
||||
inputs_minus_labels,
|
||||
labels,
|
||||
validation_data=(inputs_minus_labels, labels),
|
||||
steps_per_epoch=1,
|
||||
validation_steps=1,
|
||||
shuffle=False,
|
||||
verbose=0,
|
||||
)
|
||||
|
||||
loss = history.history["loss"][0]
|
||||
self.assertTrue(not isnan(loss))
|
||||
val_loss = history.history["val_loss"][0]
|
||||
self.assertTrue(not isnan(val_loss))
|
||||
|
||||
@slow
|
||||
def test_saved_model_creation(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
Reference in New Issue
Block a user