* fix * [test_all] trigger full CI --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -98,88 +98,106 @@ class ConfigPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-config")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-config-org")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-config")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
config.push_to_hub("test-config", token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(f"{USER}/test-config")
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-config")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(tmp_dir, repo_id="test-config", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}"
|
||||
|
||||
new_config = BertConfig.from_pretrained(f"{USER}/test-config")
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
config.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(tmp_repo)
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}"
|
||||
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
# Push to hub via save_pretrained
|
||||
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(tmp_repo)
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
config.push_to_hub("valid_org/test-config-org", token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained("valid_org/test-config-org")
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-config-org")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(tmp_dir, repo_id="valid_org/test-config-org", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
config.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained("valid_org/test-config-org")
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
new_config = BertConfig.from_pretrained(tmp_repo)
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
# Push to hub via save_pretrained
|
||||
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(tmp_repo)
|
||||
for k, v in config.to_dict().items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_dynamic_config(self):
|
||||
CustomConfig.register_for_auto_class()
|
||||
config = CustomConfig(attribute=42)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-config-{Path(tmp_dir).name}"
|
||||
|
||||
config.push_to_hub("test-dynamic-config", token=self._token)
|
||||
CustomConfig.register_for_auto_class()
|
||||
config = CustomConfig(attribute=42)
|
||||
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
|
||||
config.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_config = AutoConfig.from_pretrained(f"{USER}/test-dynamic-config", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(new_config.__class__.__name__, "CustomConfig")
|
||||
self.assertEqual(new_config.attribute, 42)
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
|
||||
|
||||
new_config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(new_config.__class__.__name__, "CustomConfig")
|
||||
self.assertEqual(new_config.attribute, 42)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
|
||||
class ConfigTestUtils(unittest.TestCase):
|
||||
|
||||
@@ -60,85 +60,91 @@ class FeatureExtractorPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-feature-extractor")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-feature-extractor-org")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-feature-extractor")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub("test-feature-extractor", token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"{USER}/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-feature-extractor")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
feature_extractor.save_pretrained(
|
||||
tmp_dir, repo_id="test-feature-extractor", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"{USER}/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
# Push to hub via save_pretrained
|
||||
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub("valid_org/test-feature-extractor", token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("valid_org/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-feature-extractor")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
feature_extractor.save_pretrained(
|
||||
tmp_dir, repo_id="valid_org/test-feature-extractor-org", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("valid_org/test-feature-extractor-org")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
# Push to hub via save_pretrained
|
||||
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_dynamic_feature_extractor(self):
|
||||
CustomFeatureExtractor.register_for_auto_class()
|
||||
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-feature-extractor-{Path(tmp_dir).name}"
|
||||
CustomFeatureExtractor.register_for_auto_class()
|
||||
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
|
||||
feature_extractor.push_to_hub("test-dynamic-feature-extractor", token=self._token)
|
||||
feature_extractor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
feature_extractor.auto_map,
|
||||
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
|
||||
)
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
feature_extractor.auto_map,
|
||||
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
|
||||
)
|
||||
|
||||
new_feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
f"{USER}/test-dynamic-feature-extractor", trust_remote_code=True
|
||||
)
|
||||
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
|
||||
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")
|
||||
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
|
||||
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@@ -71,88 +71,93 @@ class ImageProcessorPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-image-processor")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-image-processor-org")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-image-processor")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
image_processor.push_to_hub("test-image-processor", token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor")
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-image-processor")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
image_processor.save_pretrained(
|
||||
tmp_dir, repo_id="test-image-processor", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}"
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
image_processor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor")
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}"
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
# Push to hub via save_pretrained
|
||||
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
image_processor.push_to_hub("valid_org/test-image-processor", token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor")
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-image-processor")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
image_processor.save_pretrained(
|
||||
tmp_dir, repo_id="valid_org/test-image-processor-org", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}"
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
image_processor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor-org")
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}"
|
||||
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
# Push to hub via save_pretrained
|
||||
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
|
||||
for k, v in image_processor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_image_processor, k))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_dynamic_image_processor(self):
|
||||
CustomImageProcessor.register_for_auto_class()
|
||||
image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-image-processor-{Path(tmp_dir).name}"
|
||||
CustomImageProcessor.register_for_auto_class()
|
||||
image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
|
||||
|
||||
image_processor.push_to_hub("test-dynamic-image-processor", token=self._token)
|
||||
image_processor.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
image_processor.auto_map,
|
||||
{"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"},
|
||||
)
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
image_processor.auto_map,
|
||||
{"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"},
|
||||
)
|
||||
|
||||
new_image_processor = AutoImageProcessor.from_pretrained(
|
||||
f"{USER}/test-dynamic-image-processor", trust_remote_code=True
|
||||
)
|
||||
# Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module
|
||||
self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor")
|
||||
new_image_processor = AutoImageProcessor.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module
|
||||
self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
|
||||
class ImageProcessingUtilsTester(unittest.TestCase):
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import HfFolder, delete_repo, snapshot_download
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from transformers import BertConfig, BertModel, is_flax_available, is_torch_available
|
||||
from transformers.testing_utils import (
|
||||
@@ -55,89 +55,103 @@ class FlaxModelPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-model-flax")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-model-flax-org")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub("test-model-flax", token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(f"{USER}/test-model-flax")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model-flax")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model-flax", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(f"{USER}/test-model-flax")
|
||||
new_model = FlaxBertModel.from_pretrained(tmp_repo)
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(tmp_repo)
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub("valid_org/test-model-flax-org", token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained("valid_org/test-model-flax-org")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-flax-org")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(
|
||||
tmp_dir, repo_id="valid_org/test-model-flax-org", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained("valid_org/test-model-flax-org")
|
||||
new_model = FlaxBertModel.from_pretrained(tmp_repo)
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(tmp_repo)
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
|
||||
def check_models_equal(model1, model2):
|
||||
|
||||
@@ -23,6 +23,7 @@ import random
|
||||
import tempfile
|
||||
import unittest
|
||||
import unittest.mock as mock
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, snapshot_download
|
||||
from requests.exceptions import HTTPError
|
||||
@@ -682,127 +683,149 @@ class TFModelPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-model-tf")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-model-tf-callback")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-model-tf-org")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
logging.set_verbosity_info()
|
||||
logger = logging.get_logger("transformers.utils.hub")
|
||||
with CaptureLogger(logger) as cl:
|
||||
model.push_to_hub("test-model-tf", token=self._token)
|
||||
logging.set_verbosity_warning()
|
||||
# Check the model card was created and uploaded.
|
||||
self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model-tf")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model-tf", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
logging.set_verbosity_info()
|
||||
logger = logging.get_logger("transformers.utils.hub")
|
||||
with CaptureLogger(logger) as cl:
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
logging.set_verbosity_warning()
|
||||
# Check the model card was created and uploaded.
|
||||
self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(tmp_repo)
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(tmp_repo)
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_push_to_hub_callback(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertForMaskedLM(config)
|
||||
model.compile()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
push_to_hub_callback = PushToHubCallback(
|
||||
output_dir=tmp_dir,
|
||||
hub_model_id="test-model-tf-callback",
|
||||
hub_token=self._token,
|
||||
)
|
||||
model.fit(model.dummy_inputs, model.dummy_inputs, epochs=1, callbacks=[push_to_hub_callback])
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-tf-callback-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertForMaskedLM(config)
|
||||
model.compile()
|
||||
|
||||
new_model = TFBertForMaskedLM.from_pretrained(f"{USER}/test-model-tf-callback")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
push_to_hub_callback = PushToHubCallback(
|
||||
output_dir=tmp_dir,
|
||||
hub_model_id=tmp_repo,
|
||||
hub_token=self._token,
|
||||
)
|
||||
model.fit(model.dummy_inputs, model.dummy_inputs, epochs=1, callbacks=[push_to_hub_callback])
|
||||
|
||||
tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters)
|
||||
tf_push_to_hub_params.pop("base_model_card_args")
|
||||
pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters)
|
||||
pt_push_to_hub_params.pop("deprecated_kwargs")
|
||||
self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params)
|
||||
new_model = TFBertForMaskedLM.from_pretrained(tmp_repo)
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters)
|
||||
tf_push_to_hub_params.pop("base_model_card_args")
|
||||
pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters)
|
||||
pt_push_to_hub_params.pop("deprecated_kwargs")
|
||||
self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
model.push_to_hub("valid_org/test-model-tf-org", token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-tf-org")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id="valid_org/test-model-tf-org")
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(tmp_repo)
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
model.build_in_name_scope()
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(tmp_repo)
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if not tf.math.reduce_all(p1 == p2):
|
||||
models_equal = False
|
||||
break
|
||||
self.assertTrue(models_equal)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@@ -1876,142 +1876,168 @@ class ModelPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-model")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-model-org")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-model")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-model-with-tags")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="This test is flaky")
|
||||
def test_push_to_hub(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
model.push_to_hub("test-model", token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained(f"{USER}/test-model")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained(f"{USER}/test-model")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
new_model = BertModel.from_pretrained(tmp_repo)
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@unittest.skip(reason="This test is flaky")
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained(tmp_repo)
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_with_description(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
COMMIT_DESCRIPTION = """
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
COMMIT_DESCRIPTION = """
|
||||
The commit description supports markdown synthax see:
|
||||
```python
|
||||
>>> form transformers import AutoConfig
|
||||
>>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
|
||||
```
|
||||
"""
|
||||
commit_details = model.push_to_hub(
|
||||
"test-model", use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION
|
||||
)
|
||||
self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION)
|
||||
commit_details = model.push_to_hub(
|
||||
tmp_repo, use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION
|
||||
)
|
||||
self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@unittest.skip(reason="This test is flaky")
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
model.push_to_hub("valid_org/test-model-org", token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained("valid_org/test-model-org")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-org")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id="valid_org/test-model-org")
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained("valid_org/test-model-org")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
new_model = BertModel.from_pretrained(tmp_repo)
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@unittest.skip(reason="This test is flaky")
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}"
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
# Push to hub via save_pretrained
|
||||
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo)
|
||||
|
||||
new_model = BertModel.from_pretrained(tmp_repo)
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_dynamic_model(self):
|
||||
CustomConfig.register_for_auto_class()
|
||||
CustomModel.register_for_auto_class()
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-model-{Path(tmp_dir).name}"
|
||||
CustomConfig.register_for_auto_class()
|
||||
CustomModel.register_for_auto_class()
|
||||
|
||||
config = CustomConfig(hidden_size=32)
|
||||
model = CustomModel(config)
|
||||
config = CustomConfig(hidden_size=32)
|
||||
model = CustomModel(config)
|
||||
|
||||
model.push_to_hub("test-dynamic-model", token=self._token)
|
||||
# checks
|
||||
self.assertDictEqual(
|
||||
config.auto_map,
|
||||
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
|
||||
)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
# checks
|
||||
self.assertDictEqual(
|
||||
config.auto_map,
|
||||
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
|
||||
)
|
||||
|
||||
new_model = AutoModel.from_pretrained(f"{USER}/test-dynamic-model", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
|
||||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
new_model = AutoModel.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
|
||||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
config = AutoConfig.from_pretrained(f"{USER}/test-dynamic-model", trust_remote_code=True)
|
||||
new_model = AutoModel.from_config(config, trust_remote_code=True)
|
||||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||||
config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
new_model = AutoModel.from_config(config, trust_remote_code=True)
|
||||
self.assertEqual(new_model.__class__.__name__, "CustomModel")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_with_tags(self):
|
||||
from huggingface_hub import ModelCard
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-model-with-tags-{Path(tmp_dir).name}"
|
||||
from huggingface_hub import ModelCard
|
||||
|
||||
new_tags = ["tag-1", "tag-2"]
|
||||
new_tags = ["tag-1", "tag-2"]
|
||||
|
||||
CustomConfig.register_for_auto_class()
|
||||
CustomModel.register_for_auto_class()
|
||||
CustomConfig.register_for_auto_class()
|
||||
CustomModel.register_for_auto_class()
|
||||
|
||||
config = CustomConfig(hidden_size=32)
|
||||
model = CustomModel(config)
|
||||
config = CustomConfig(hidden_size=32)
|
||||
model = CustomModel(config)
|
||||
|
||||
self.assertTrue(model.model_tags is None)
|
||||
self.assertTrue(model.model_tags is None)
|
||||
|
||||
model.add_model_tags(new_tags)
|
||||
model.add_model_tags(new_tags)
|
||||
|
||||
self.assertTrue(model.model_tags == new_tags)
|
||||
self.assertTrue(model.model_tags == new_tags)
|
||||
|
||||
model.push_to_hub("test-dynamic-model-with-tags", token=self._token)
|
||||
model.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
loaded_model_card = ModelCard.load(f"{USER}/test-dynamic-model-with-tags")
|
||||
self.assertEqual(loaded_model_card.data.tags, new_tags)
|
||||
loaded_model_card = ModelCard.load(tmp_repo)
|
||||
self.assertEqual(loaded_model_card.data.tags, new_tags)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
|
||||
@require_torch
|
||||
|
||||
@@ -118,110 +118,133 @@ class TokenizerPushToHubTester(unittest.TestCase):
|
||||
cls._token = TOKEN
|
||||
HfFolder.save_token(TOKEN)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
@staticmethod
|
||||
def _try_delete_repo(repo_id, token):
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-tokenizer")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="valid_org/test-tokenizer-org")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-tokenizer")
|
||||
except HTTPError:
|
||||
# Reset repo
|
||||
delete_repo(repo_id=repo_id, token=token)
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}"
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
|
||||
tokenizer.push_to_hub("test-tokenizer", token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
tokenizer.push_to_hub(tmp_repo, token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-tokenizer")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
def test_push_to_hub_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tokenizer.save_pretrained(tmp_dir, repo_id="test-tokenizer", push_to_hub=True, token=self._token)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}"
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
# Push to hub via save_pretrained
|
||||
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}"
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
|
||||
tokenizer.push_to_hub("valid_org/test-tokenizer-org", token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
tokenizer.push_to_hub(tmp_repo, token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
try:
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-tokenizer-org")
|
||||
except: # noqa E722
|
||||
pass
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
def test_push_to_hub_in_organization_via_save_pretrained(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tokenizer.save_pretrained(
|
||||
tmp_dir, repo_id="valid_org/test-tokenizer-org", push_to_hub=True, token=self._token
|
||||
)
|
||||
try:
|
||||
tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}"
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
# Push to hub via save_pretrained
|
||||
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@require_tokenizers
|
||||
def test_push_to_hub_dynamic_tokenizer(self):
|
||||
CustomTokenizer.register_for_auto_class()
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = CustomTokenizer(vocab_file)
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}"
|
||||
CustomTokenizer.register_for_auto_class()
|
||||
|
||||
# No fast custom tokenizer
|
||||
tokenizer.push_to_hub("test-dynamic-tokenizer", token=self._token)
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = CustomTokenizer(vocab_file)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(f"{USER}/test-dynamic-tokenizer", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
|
||||
# No fast custom tokenizer
|
||||
tokenizer.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
# Fast and slow custom tokenizer
|
||||
CustomTokenizerFast.register_for_auto_class()
|
||||
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
@require_tokenizers
|
||||
def test_push_to_hub_dynamic_tokenizer_with_both_slow_and_fast_classes(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
try:
|
||||
tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}"
|
||||
CustomTokenizer.register_for_auto_class()
|
||||
|
||||
bert_tokenizer = BertTokenizerFast.from_pretrained(tmp_dir)
|
||||
bert_tokenizer.save_pretrained(tmp_dir)
|
||||
tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir)
|
||||
# Fast and slow custom tokenizer
|
||||
CustomTokenizerFast.register_for_auto_class()
|
||||
|
||||
tokenizer.push_to_hub("test-dynamic-tokenizer", token=self._token)
|
||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(f"{USER}/test-dynamic-tokenizer", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast")
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
f"{USER}/test-dynamic-tokenizer", use_fast=False, trust_remote_code=True
|
||||
)
|
||||
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
|
||||
bert_tokenizer = BertTokenizerFast.from_pretrained(tmp_dir)
|
||||
bert_tokenizer.save_pretrained(tmp_dir)
|
||||
tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir)
|
||||
|
||||
tokenizer.push_to_hub(tmp_repo, token=self._token)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, use_fast=False, trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
|
||||
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
|
||||
finally:
|
||||
# Always (try to) delete the repo.
|
||||
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
|
||||
|
||||
|
||||
class TrieTest(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user