Load dynamic module (remote code) only once if code isn't change (#33162)
* Load remote code only once * Use hash as load indicator * Add a new option `force_reload` for old behavior (i.e. always reload) * Add test for dynamic module is cached * Add more type annotations to improve code readability * Address comments from code review
This commit is contained in:
@@ -122,12 +122,27 @@ class AutoConfigTest(unittest.TestCase):
|
||||
config = AutoConfig.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
|
||||
self.assertEqual(config.__class__.__name__, "NewModelConfig")
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_config = AutoConfig.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
|
||||
self.assertIs(config.__class__, reloaded_config.__class__)
|
||||
|
||||
# Test config can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(tmp_dir)
|
||||
reloaded_config = AutoConfig.from_pretrained(tmp_dir, trust_remote_code=True)
|
||||
self.assertEqual(reloaded_config.__class__.__name__, "NewModelConfig")
|
||||
|
||||
# The configuration file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the configuration file is not changed.
|
||||
# Test the dynamic module is loaded only once if the configuration file is not changed.
|
||||
self.assertIs(config.__class__, reloaded_config.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_config = AutoConfig.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_model", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(config.__class__, reloaded_config.__class__)
|
||||
|
||||
def test_from_pretrained_dynamic_config_conflict(self):
|
||||
class NewModelConfigLocal(BertConfig):
|
||||
model_type = "new-model"
|
||||
|
||||
@@ -116,12 +116,29 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(feature_extractor.__class__.__name__, "NewFeatureExtractor")
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_feature_extractor", trust_remote_code=True
|
||||
)
|
||||
self.assertIs(feature_extractor.__class__, reloaded_feature_extractor.__class__)
|
||||
|
||||
# Test feature extractor can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
feature_extractor.save_pretrained(tmp_dir)
|
||||
reloaded_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_dir, trust_remote_code=True)
|
||||
self.assertEqual(reloaded_feature_extractor.__class__.__name__, "NewFeatureExtractor")
|
||||
|
||||
# The feature extractor file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the module file is not changed.
|
||||
# Test the dynamic module is loaded only once if the module file is not changed.
|
||||
self.assertIs(feature_extractor.__class__, reloaded_feature_extractor.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_feature_extractor", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(feature_extractor.__class__, reloaded_feature_extractor.__class__)
|
||||
|
||||
def test_new_feature_extractor_registration(self):
|
||||
try:
|
||||
AutoConfig.register("custom", CustomConfig)
|
||||
|
||||
@@ -167,12 +167,29 @@ class AutoImageProcessorTest(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(image_processor.__class__.__name__, "NewImageProcessor")
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_image_processor = AutoImageProcessor.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_image_processor", trust_remote_code=True
|
||||
)
|
||||
self.assertIs(image_processor.__class__, reloaded_image_processor.__class__)
|
||||
|
||||
# Test image processor can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
image_processor.save_pretrained(tmp_dir)
|
||||
reloaded_image_processor = AutoImageProcessor.from_pretrained(tmp_dir, trust_remote_code=True)
|
||||
self.assertEqual(reloaded_image_processor.__class__.__name__, "NewImageProcessor")
|
||||
|
||||
# The image processor file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the module file is not changed.
|
||||
# Test the dynamic module is loaded only once if the module file is not changed.
|
||||
self.assertIs(image_processor.__class__, reloaded_image_processor.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_image_processor = AutoImageProcessor.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_image_processor", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(image_processor.__class__, reloaded_image_processor.__class__)
|
||||
|
||||
def test_new_image_processor_registration(self):
|
||||
try:
|
||||
AutoConfig.register("custom", CustomConfig)
|
||||
|
||||
@@ -319,6 +319,10 @@ class AutoModelTest(unittest.TestCase):
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
|
||||
self.assertEqual(model.__class__.__name__, "NewModel")
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model", trust_remote_code=True)
|
||||
self.assertIs(model.__class__, reloaded_model.__class__)
|
||||
|
||||
# Test model can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir)
|
||||
@@ -328,10 +332,27 @@ class AutoModelTest(unittest.TestCase):
|
||||
for p1, p2 in zip(model.parameters(), reloaded_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
# The model file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the module file is not changed.
|
||||
# Test the dynamic module is loaded only once if the module file is not changed.
|
||||
self.assertIs(model.__class__, reloaded_model.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_model = AutoModel.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_model", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(model.__class__, reloaded_model.__class__)
|
||||
|
||||
# This one uses a relative import to a util file, this checks it is downloaded and used properly.
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True)
|
||||
self.assertEqual(model.__class__.__name__, "NewModel")
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_model = AutoModel.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True
|
||||
)
|
||||
self.assertIs(model.__class__, reloaded_model.__class__)
|
||||
|
||||
# Test model can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir)
|
||||
@@ -341,6 +362,17 @@ class AutoModelTest(unittest.TestCase):
|
||||
for p1, p2 in zip(model.parameters(), reloaded_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
# The model file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the module file is not changed.
|
||||
# Test the dynamic module is loaded only once if the module file is not changed.
|
||||
self.assertIs(model.__class__, reloaded_model.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_model = AutoModel.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_model_with_util", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(model.__class__, reloaded_model.__class__)
|
||||
|
||||
def test_from_pretrained_dynamic_model_distant_with_ref(self):
|
||||
model = AutoModel.from_pretrained("hf-internal-testing/ref_to_test_dynamic_model", trust_remote_code=True)
|
||||
self.assertEqual(model.__class__.__name__, "NewModel")
|
||||
|
||||
@@ -314,6 +314,13 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/test_dynamic_tokenizer", trust_remote_code=True)
|
||||
self.assertTrue(tokenizer.special_attribute_present)
|
||||
|
||||
# Test the dynamic module is loaded only once.
|
||||
reloaded_tokenizer = AutoTokenizer.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_tokenizer", trust_remote_code=True
|
||||
)
|
||||
self.assertIs(tokenizer.__class__, reloaded_tokenizer.__class__)
|
||||
|
||||
# Test tokenizer can be reloaded.
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tokenizer.save_pretrained(tmp_dir)
|
||||
@@ -340,6 +347,18 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
|
||||
self.assertEqual(reloaded_tokenizer.__class__.__name__, "NewTokenizer")
|
||||
|
||||
# The tokenizer file is cached in the snapshot directory. So the module file is not changed after dumping
|
||||
# to a temp dir. Because the revision of the module file is not changed.
|
||||
# Test the dynamic module is loaded only once if the module file is not changed.
|
||||
self.assertIs(tokenizer.__class__, reloaded_tokenizer.__class__)
|
||||
|
||||
# Test the dynamic module is reloaded if we force it.
|
||||
reloaded_tokenizer = AutoTokenizer.from_pretrained(
|
||||
"hf-internal-testing/test_dynamic_tokenizer", trust_remote_code=True, force_download=True
|
||||
)
|
||||
self.assertIsNot(tokenizer.__class__, reloaded_tokenizer.__class__)
|
||||
self.assertTrue(reloaded_tokenizer.special_attribute_present)
|
||||
|
||||
@require_tokenizers
|
||||
def test_from_pretrained_dynamic_tokenizer_conflict(self):
|
||||
class NewTokenizer(BertTokenizer):
|
||||
|
||||
Reference in New Issue
Block a user