Load dynamic module (remote code) only once if code isn't change (#33162)

* Load remote code only once

* Use hash as load indicator

* Add a new option `force_reload` for old behavior (i.e. always reload)

* Add test for dynamic module is cached

* Add more type annotations to improve code readability

* Address comments from code review
This commit is contained in:
Xuehai Pan
2024-09-06 19:49:35 +08:00
committed by GitHub
parent 1bd9d1c899
commit e1c2b69c34
6 changed files with 139 additions and 12 deletions

View File

@@ -319,6 +319,10 @@ class AutoModelTest(unittest.TestCase):
model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
self.assertEqual(model.__class__.__name__, "NewModel")
# Test the dynamic module is loaded only once.
reloaded_model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
self.assertIs(model.__class__, reloaded_model.__class__)
# Test model can be reloaded.
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
@@ -328,10 +332,27 @@ class AutoModelTest(unittest.TestCase):
for p1, p2 in zip(model.parameters(), reloaded_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
# The model file is cached in the snapshot directory. So the module file is not changed after dumping
# to a temp dir. Because the revision of the module file is not changed.
# Test the dynamic module is loaded only once if the module file is not changed.
self.assertIs(model.__class__, reloaded_model.__class__)
# Test the dynamic module is reloaded if we force it.
reloaded_model = AutoModel.from_pretrained(
"hf-internal-testing/test_dynamic_model", trust_remote_code=True, force_download=True
)
self.assertIsNot(model.__class__, reloaded_model.__class__)
# This one uses a relative import to a util file, this checks it is downloaded and used properly.
model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True)
self.assertEqual(model.__class__.__name__, "NewModel")
# Test the dynamic module is loaded only once.
reloaded_model = AutoModel.from_pretrained(
"hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True
)
self.assertIs(model.__class__, reloaded_model.__class__)
# Test model can be reloaded.
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
@@ -341,6 +362,17 @@ class AutoModelTest(unittest.TestCase):
for p1, p2 in zip(model.parameters(), reloaded_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
# The model file is cached in the snapshot directory. So the module file is not changed after dumping
# to a temp dir. Because the revision of the module file is not changed.
# Test the dynamic module is loaded only once if the module file is not changed.
self.assertIs(model.__class__, reloaded_model.__class__)
# Test the dynamic module is reloaded if we force it.
reloaded_model = AutoModel.from_pretrained(
"hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True, force_download=True
)
self.assertIsNot(model.__class__, reloaded_model.__class__)
def test_from_pretrained_dynamic_model_distant_with_ref(self):
model = AutoModel.from_pretrained("hf-internal-testing/ref_to_test_dynamic_model", trust_remote_code=True)
self.assertEqual(model.__class__.__name__, "NewModel")