Rewrite push_to_hub to use upload_files (#18366)
* Rewrite push_to_hub to use upload_files * Adapt the doc a bit * Address review comments and clean doc
This commit is contained in:
@@ -23,7 +23,7 @@ import unittest
|
||||
import unittest.mock as mock
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
||||
from huggingface_hub import HfFolder, delete_repo, set_access_token
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import AutoConfig, BertConfig, GPT2Config, is_torch_available
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
@@ -243,46 +243,58 @@ class ConfigPushToHubTester(unittest.TestCase):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(os.path.join(tmp_dir, "test-config"), push_to_hub=True, use_auth_token=self._token)
|
||||
config.push_to_hub("test-config", use_auth_token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(f"{USER}/test-config")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
new_config = BertConfig.from_pretrained(f"{USER}/test-config")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-config")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(tmp_dir, repo_id="test-config", push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained(f"{USER}/test-config")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
config.push_to_hub("valid_org/test-config-org", use_auth_token=self._token)
|
||||
|
||||
new_config = BertConfig.from_pretrained("valid_org/test-config-org")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-config-org")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
config.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-config-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, repo_id="valid_org/test-config-org", push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_config = BertConfig.from_pretrained("valid_org/test-config-org")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
new_config = BertConfig.from_pretrained("valid_org/test-config-org")
|
||||
for k, v in config.__dict__.items():
|
||||
if k != "transformers_version":
|
||||
self.assertEqual(v, getattr(new_config, k))
|
||||
|
||||
def test_push_to_hub_dynamic_config(self):
|
||||
CustomConfig.register_for_auto_class()
|
||||
config = CustomConfig(attribute=42)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-config", use_auth_token=self._token)
|
||||
config.save_pretrained(tmp_dir)
|
||||
config.push_to_hub("test-dynamic-config", use_auth_token=self._token)
|
||||
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
|
||||
# The code has been copied from fixtures
|
||||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_configuration.py")))
|
||||
|
||||
repo.push_to_hub()
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
|
||||
|
||||
new_config = AutoConfig.from_pretrained(f"{USER}/test-dynamic-config", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module
|
||||
|
||||
@@ -22,7 +22,7 @@ import unittest
|
||||
import unittest.mock as mock
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
||||
from huggingface_hub import HfFolder, delete_repo, set_access_token
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
|
||||
from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test
|
||||
@@ -167,47 +167,57 @@ class FeatureExtractorPushToHubTester(unittest.TestCase):
|
||||
|
||||
def test_push_to_hub(self):
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub("test-feature-extractor", use_auth_token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"{USER}/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-feature-extractor")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
feature_extractor.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-feature-extractor"), push_to_hub=True, use_auth_token=self._token
|
||||
tmp_dir, repo_id="test-feature-extractor", push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"{USER}/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"{USER}/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
feature_extractor.push_to_hub("valid_org/test-feature-extractor", use_auth_token=self._token)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("valid_org/test-feature-extractor")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-feature-extractor")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
feature_extractor.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-feature-extractor-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, repo_id="valid_org/test-feature-extractor-org", push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("valid_org/test-feature-extractor-org")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("valid_org/test-feature-extractor-org")
|
||||
for k, v in feature_extractor.__dict__.items():
|
||||
self.assertEqual(v, getattr(new_feature_extractor, k))
|
||||
|
||||
def test_push_to_hub_dynamic_feature_extractor(self):
|
||||
CustomFeatureExtractor.register_for_auto_class()
|
||||
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-feature-extractor", use_auth_token=self._token)
|
||||
feature_extractor.save_pretrained(tmp_dir)
|
||||
feature_extractor.push_to_hub("test-dynamic-feature-extractor", use_auth_token=self._token)
|
||||
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
feature_extractor.auto_map,
|
||||
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
|
||||
)
|
||||
# The code has been copied from fixtures
|
||||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_feature_extraction.py")))
|
||||
|
||||
repo.push_to_hub()
|
||||
# This has added the proper auto_map field to the config
|
||||
self.assertDictEqual(
|
||||
feature_extractor.auto_map,
|
||||
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
|
||||
)
|
||||
|
||||
new_feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
f"{USER}/test-dynamic-feature-extractor", trust_remote_code=True
|
||||
|
||||
@@ -32,7 +32,7 @@ from typing import Dict, List, Tuple
|
||||
import numpy as np
|
||||
|
||||
import transformers
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
||||
from huggingface_hub import HfFolder, delete_repo, set_access_token
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
@@ -2962,39 +2962,51 @@ class ModelPushToHubTester(unittest.TestCase):
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
try:
|
||||
delete_repo(token=cls._token, repo_id="test-dynamic-model-config")
|
||||
except HTTPError:
|
||||
pass
|
||||
|
||||
def test_push_to_hub(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(os.path.join(tmp_dir, "test-model"), push_to_hub=True, use_auth_token=self._token)
|
||||
model.push_to_hub("test-model", use_auth_token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained(f"{USER}/test-model")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
new_model = BertModel.from_pretrained(f"{USER}/test-model")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model", push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained(f"{USER}/test-model")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = BertModel(config)
|
||||
model.push_to_hub("valid_org/test-model-org", use_auth_token=self._token)
|
||||
|
||||
new_model = BertModel.from_pretrained("valid_org/test-model-org")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-org")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-model-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, push_to_hub=True, use_auth_token=self._token, repo_id="valid_org/test-model-org"
|
||||
)
|
||||
|
||||
new_model = BertModel.from_pretrained("valid_org/test-model-org")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
new_model = BertModel.from_pretrained("valid_org/test-model-org")
|
||||
for p1, p2 in zip(model.parameters(), new_model.parameters()):
|
||||
self.assertTrue(torch.equal(p1, p2))
|
||||
|
||||
def test_push_to_hub_dynamic_model(self):
|
||||
CustomConfig.register_for_auto_class()
|
||||
@@ -3003,16 +3015,12 @@ class ModelPushToHubTester(unittest.TestCase):
|
||||
config = CustomConfig(hidden_size=32)
|
||||
model = CustomModel(config)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-model", use_auth_token=self._token)
|
||||
model.save_pretrained(tmp_dir)
|
||||
# checks
|
||||
self.assertDictEqual(
|
||||
config.auto_map,
|
||||
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
|
||||
)
|
||||
|
||||
repo.push_to_hub()
|
||||
model.push_to_hub("test-dynamic-model", use_auth_token=self._token)
|
||||
# checks
|
||||
self.assertDictEqual(
|
||||
config.auto_map,
|
||||
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
|
||||
)
|
||||
|
||||
new_model = AutoModel.from_pretrained(f"{USER}/test-dynamic-model", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
|
||||
|
||||
@@ -1153,38 +1153,63 @@ class FlaxModelPushToHubTester(unittest.TestCase):
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub("test-model-flax", use_auth_token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(f"{USER}/test-model-flax")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model-flax")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-model-flax"), push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model-flax", push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained(f"{USER}/test-model-flax")
|
||||
new_model = FlaxBertModel.from_pretrained(f"{USER}/test-model-flax")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = FlaxBertModel(config)
|
||||
model.push_to_hub("valid_org/test-model-flax-org", use_auth_token=self._token)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained("valid_org/test-model-flax-org")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-flax-org")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-model-flax-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, repo_id="valid_org/test-model-flax-org", push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_model = FlaxBertModel.from_pretrained("valid_org/test-model-flax-org")
|
||||
new_model = FlaxBertModel.from_pretrained("valid_org/test-model-flax-org")
|
||||
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
base_params = flatten_dict(unfreeze(model.params))
|
||||
new_params = flatten_dict(unfreeze(new_model.params))
|
||||
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
for key in base_params.keys():
|
||||
max_diff = (base_params[key] - new_params[key]).sum().item()
|
||||
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
|
||||
|
||||
@@ -33,17 +33,18 @@ from requests.exceptions import HTTPError
|
||||
from transformers import is_tf_available, is_torch_available
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.models.auto import get_values
|
||||
from transformers.testing_utils import tooslow # noqa: F401
|
||||
from transformers.testing_utils import (
|
||||
from transformers.testing_utils import ( # noqa: F401
|
||||
TOKEN,
|
||||
USER,
|
||||
CaptureLogger,
|
||||
CaptureStdout,
|
||||
_tf_gpu_memory_limit,
|
||||
is_pt_tf_cross_test,
|
||||
is_staging_test,
|
||||
require_tf,
|
||||
require_tf2onnx,
|
||||
slow,
|
||||
tooslow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import logging
|
||||
@@ -2189,41 +2190,65 @@ class TFModelPushToHubTester(unittest.TestCase):
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
_ = model(model.dummy_inputs)
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(os.path.join(tmp_dir, "test-model-tf"), push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
logging.set_verbosity_info()
|
||||
logger = logging.get_logger("transformers.utils.hub")
|
||||
with CaptureLogger(logger) as cl:
|
||||
model.push_to_hub("test-model-tf", use_auth_token=self._token)
|
||||
logging.set_verbosity_warning()
|
||||
# Check the model card was created and uploaded.
|
||||
self.assertIn("Uploading README.md to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out)
|
||||
|
||||
def test_push_to_hub_with_model_card(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-model-tf")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.push_to_hub(os.path.join(tmp_dir, "test-model-tf"))
|
||||
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "test-model-tf", "README.md")))
|
||||
model.save_pretrained(tmp_dir, repo_id="test-model-tf", push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
config = BertConfig(
|
||||
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
|
||||
)
|
||||
model = TFBertModel(config)
|
||||
# Make sure model is properly initialized
|
||||
_ = model(model.dummy_inputs)
|
||||
|
||||
model.push_to_hub("valid_org/test-model-tf-org", use_auth_token=self._token)
|
||||
|
||||
new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-model-tf-org")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-model-tf-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, push_to_hub=True, use_auth_token=self._token, repo_id="valid_org/test-model-tf-org"
|
||||
)
|
||||
|
||||
new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org")
|
||||
models_equal = True
|
||||
for p1, p2 in zip(model.weights, new_model.weights):
|
||||
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
|
||||
models_equal = False
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
@@ -30,7 +30,7 @@ from itertools import takewhile
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
|
||||
|
||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
||||
from huggingface_hub import HfFolder, delete_repo, set_access_token
|
||||
from parameterized import parameterized
|
||||
from requests.exceptions import HTTPError
|
||||
from transformers import (
|
||||
@@ -3875,12 +3875,20 @@ class TokenizerPushToHubTester(unittest.TestCase):
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
tokenizer.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-tokenizer"), push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
tokenizer.push_to_hub("test-tokenizer", use_auth_token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="test-tokenizer")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tokenizer.save_pretrained(tmp_dir, repo_id="test-tokenizer", push_to_hub=True, use_auth_token=self._token)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained(f"{USER}/test-tokenizer")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
|
||||
def test_push_to_hub_in_organization(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
@@ -3888,15 +3896,22 @@ class TokenizerPushToHubTester(unittest.TestCase):
|
||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||
tokenizer = BertTokenizer(vocab_file)
|
||||
|
||||
tokenizer.push_to_hub("valid_org/test-tokenizer-org", use_auth_token=self._token)
|
||||
new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
|
||||
# Reset repo
|
||||
delete_repo(token=self._token, repo_id="valid_org/test-tokenizer-org")
|
||||
|
||||
# Push to hub via save_pretrained
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tokenizer.save_pretrained(
|
||||
os.path.join(tmp_dir, "test-tokenizer-org"),
|
||||
push_to_hub=True,
|
||||
use_auth_token=self._token,
|
||||
organization="valid_org",
|
||||
tmp_dir, repo_id="valid_org/test-tokenizer-org", push_to_hub=True, use_auth_token=self._token
|
||||
)
|
||||
|
||||
new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
new_tokenizer = BertTokenizer.from_pretrained("valid_org/test-tokenizer-org")
|
||||
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
|
||||
|
||||
@require_tokenizers
|
||||
def test_push_to_hub_dynamic_tokenizer(self):
|
||||
@@ -3908,17 +3923,7 @@ class TokenizerPushToHubTester(unittest.TestCase):
|
||||
tokenizer = CustomTokenizer(vocab_file)
|
||||
|
||||
# No fast custom tokenizer
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-tokenizer", use_auth_token=self._token)
|
||||
tokenizer.save_pretrained(tmp_dir)
|
||||
|
||||
with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f:
|
||||
tokenizer_config = json.load(f)
|
||||
self.assertDictEqual(
|
||||
tokenizer_config["auto_map"], {"AutoTokenizer": ["custom_tokenization.CustomTokenizer", None]}
|
||||
)
|
||||
|
||||
repo.push_to_hub()
|
||||
tokenizer.push_to_hub("test-dynamic-tokenizer", use_auth_token=self._token)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(f"{USER}/test-dynamic-tokenizer", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module
|
||||
@@ -3935,23 +3940,7 @@ class TokenizerPushToHubTester(unittest.TestCase):
|
||||
bert_tokenizer.save_pretrained(tmp_dir)
|
||||
tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-tokenizer", use_auth_token=self._token)
|
||||
tokenizer.save_pretrained(tmp_dir)
|
||||
|
||||
with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f:
|
||||
tokenizer_config = json.load(f)
|
||||
self.assertDictEqual(
|
||||
tokenizer_config["auto_map"],
|
||||
{
|
||||
"AutoTokenizer": [
|
||||
"custom_tokenization.CustomTokenizer",
|
||||
"custom_tokenization_fast.CustomTokenizerFast",
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
repo.push_to_hub()
|
||||
tokenizer.push_to_hub("test-dynamic-tokenizer", use_auth_token=self._token)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(f"{USER}/test-dynamic-tokenizer", trust_remote_code=True)
|
||||
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
|
||||
|
||||
Reference in New Issue
Block a user