Improve TF weight loading, especially PT crossloading (#21792)
* First commit for the improved PT-TF weight loading * Remove workarounds from TFEncoderDecoder tests * Allow a custom weight renaming function in from_pretrained and use that to clean up EncoderDecoder * make fixup * First attempt at visionencoderdecoder * Disable tensorfloat32 in tests to get consistent outputs * Quick fix to tf_vision_encoder_decoder tests * make fixup * Update Blenderbot tests * Remove unused arg in modeling_tf_opt * load_tf_sharded_weights had strict=True! This meant transfer learning was impossible, so I'm setting it to False. * Support prefixes when loading sharded TF checkpoints * make fixup * Add test to load sharded models with a weight prefix * Fix sharded weight loading test * Add a test for transfer from a sharded checkpoint * make fixup * Add test to check that crossloading from PT with a prefix works * Refactor from_pretrained in the encoderdecoder classes * Refactor from_pretrained in the encoderdecoder classes * missmatched -> mismatched * Explicitly check for None * No comments showing my very impressive and attractive knowledge of Py3.9+ * Disable TF32 across all TF tests
This commit is contained in:
@@ -90,6 +90,7 @@ if is_tf_available():
|
||||
TFAutoModel,
|
||||
TFAutoModelForSequenceClassification,
|
||||
TFBertForMaskedLM,
|
||||
TFBertForSequenceClassification,
|
||||
TFBertModel,
|
||||
TFRagModel,
|
||||
TFSharedEmbeddings,
|
||||
@@ -107,6 +108,8 @@ if is_tf_available():
|
||||
from transformers.modeling_tf_utils import tf_shard_checkpoint, unpack_inputs
|
||||
from transformers.tf_utils import stable_softmax
|
||||
|
||||
tf.config.experimental.enable_tensor_float_32_execution(False)
|
||||
|
||||
if _tf_gpu_memory_limit is not None:
|
||||
gpus = tf.config.list_physical_devices("GPU")
|
||||
for gpu in gpus:
|
||||
@@ -2140,6 +2143,18 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
for p1, p2 in zip(model.weights, ref_model.weights):
|
||||
assert np.allclose(p1.numpy(), p2.numpy())
|
||||
|
||||
def test_sharded_checkpoint_with_prefix(self):
|
||||
model = TFBertModel.from_pretrained("hf-internal-testing/tiny-random-bert", load_weight_prefix="a/b")
|
||||
sharded_model = TFBertModel.from_pretrained("ArthurZ/tiny-random-bert-sharded", load_weight_prefix="a/b")
|
||||
for p1, p2 in zip(model.weights, sharded_model.weights):
|
||||
self.assertTrue(np.allclose(p1.numpy(), p2.numpy()))
|
||||
self.assertTrue(p1.name.startswith("a/b/"))
|
||||
self.assertTrue(p2.name.startswith("a/b/"))
|
||||
|
||||
def test_sharded_checkpoint_transfer(self):
|
||||
# If this doesn't throw an error then the test passes
|
||||
TFBertForSequenceClassification.from_pretrained("ArthurZ/tiny-random-bert-sharded")
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_checkpoint_sharding_local_from_pt(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
@@ -2150,6 +2165,16 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
for p1, p2 in zip(model.weights, ref_model.weights):
|
||||
assert np.allclose(p1.numpy(), p2.numpy())
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_checkpoint_loading_with_prefix_from_pt(self):
|
||||
model = TFBertModel.from_pretrained(
|
||||
"hf-internal-testing/tiny-random-bert", from_pt=True, load_weight_prefix="a/b"
|
||||
)
|
||||
ref_model = TFBertModel.from_pretrained("hf-internal-testing/tiny-random-bert", from_pt=True)
|
||||
for p1, p2 in zip(model.weights, ref_model.weights):
|
||||
self.assertTrue(np.allclose(p1.numpy(), p2.numpy()))
|
||||
self.assertTrue(p1.name.startswith("a/b/"))
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_checkpoint_sharding_hub_from_pt(self):
|
||||
model = TFBertModel.from_pretrained("hf-internal-testing/tiny-random-bert-sharded", from_pt=True)
|
||||
|
||||
Reference in New Issue
Block a user