Fix flaky Hub CI (test_trainer.py) (#35062)

* fix

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* check

* check

* check

* check

* check

* check

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* check

* check

* check

* Final space

* Final adjustment

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: Lucain <lucainp@gmail.com>
This commit is contained in:
Yih-Dar
2024-12-05 17:02:27 +01:00
committed by GitHub
parent a928d9c128
commit b0a51e5cff
11 changed files with 670 additions and 922 deletions

View File

@@ -40,7 +40,9 @@ from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union
from unittest import mock
from unittest.mock import patch
import huggingface_hub.utils
import urllib3
from huggingface_hub import delete_repo
from transformers import logging as transformers_logging
@@ -1570,6 +1572,38 @@ def LoggingLevel(level):
transformers_logging.set_verbosity(orig_level)
class TemporaryHubRepo:
"""Create a temporary Hub repository and return its `RepoUrl` object. This is similar to
`tempfile.TemporaryDirectory` and can be used as a context manager. For example:
with TemporaryHubRepo(token=self._token) as temp_repo:
...
Upon exiting the context, the repository and everything contained in it are removed.
Example:
```python
with TemporaryHubRepo(token=self._token) as temp_repo:
model.push_to_hub(tmp_repo.repo_id, token=self._token)
```
"""
def __init__(self, namespace: Optional[str] = None, token: Optional[str] = None) -> None:
self.token = token
with tempfile.TemporaryDirectory() as tmp_dir:
repo_id = Path(tmp_dir).name
if namespace is not None:
repo_id = f"{namespace}/{repo_id}"
self.repo_url = huggingface_hub.create_repo(repo_id, token=self.token)
def __enter__(self):
return self.repo_url
def __exit__(self, exc, value, tb):
delete_repo(repo_id=self.repo_url.repo_id, token=self.token, missing_ok=True)
@contextlib.contextmanager
# adapted from https://stackoverflow.com/a/64789046/9201239
def ExtendSysPath(path: Union[str, os.PathLike]) -> Iterator[None]:

View File

@@ -18,9 +18,8 @@ import os
import tempfile
import unittest
import warnings
from pathlib import Path
from huggingface_hub import HfFolder, create_pull_request, create_repo, delete_repo
from huggingface_hub import HfFolder, create_pull_request
from parameterized import parameterized
from transformers import AutoConfig, GenerationConfig, WatermarkingConfig, is_torch_available
@@ -57,7 +56,7 @@ from transformers.generation import (
UnbatchedClassifierFreeGuidanceLogitsProcessor,
WatermarkLogitsProcessor,
)
from transformers.testing_utils import TOKEN, USER, is_staging_test, torch_device
from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, torch_device
class GenerationConfigTest(unittest.TestCase):
@@ -679,114 +678,82 @@ class ConfigPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-generation-config-org-{Path(tmp_dir).name}"
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_on_pr_revision(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
# create a repo and a PR
repo_id = f"{USER}/test-generation-config-{Path(tmp_dir).name}"
create_repo(repo_id=repo_id, token=self._token)
pr = create_pull_request(repo_id=repo_id, title="Test PR", token=self._token)
revision = f"refs/pr/{pr.num}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
# create a PR
pr = create_pull_request(repo_id=tmp_repo.repo_id, title="Test PR", token=self._token)
revision = f"refs/pr/{pr.num}"
# push to PR ref
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(repo_id, token=self._token, revision=revision)
# push to PR ref
config = GenerationConfig(
do_sample=True,
temperature=0.7,
length_penalty=1.0,
)
config.push_to_hub(tmp_repo.repo_id, token=self._token, revision=revision)
# load from PR ref
new_config = GenerationConfig.from_pretrained(repo_id, revision=revision)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=repo_id, token=self._token)
# load from PR ref
new_config = GenerationConfig.from_pretrained(tmp_repo.repo_id, revision=revision)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))

View File

@@ -21,7 +21,7 @@ import unittest
from pathlib import Path
from shutil import copyfile
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo
from huggingface_hub import HfFolder, Repository
import transformers
from transformers import (
@@ -39,7 +39,7 @@ from transformers import (
Wav2Vec2FeatureExtractor,
Wav2Vec2Processor,
)
from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
from transformers.tokenization_utils import TOKENIZER_CONFIG_FILE
from transformers.utils import FEATURE_EXTRACTOR_NAME, PROCESSOR_NAME, is_tokenizers_available
@@ -372,72 +372,52 @@ class ProcessorPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-processor-{Path(tmp_dir).name}"
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
processor.save_pretrained(tmp_repo, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-processor-org-{Path(tmp_dir).name}"
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
processor.save_pretrained(
tmp_dir,
repo_id=tmp_repo,
repo_id=tmp_repo.repo_id,
push_to_hub=True,
token=self._token,
)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_processor = Wav2Vec2Processor.from_pretrained(tmp_repo.repo_id)
for k, v in processor.feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_processor.feature_extractor, k))
self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab())
def test_push_to_hub_dynamic_processor(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-processor-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomFeatureExtractor.register_for_auto_class()
CustomTokenizer.register_for_auto_class()
CustomProcessor.register_for_auto_class()
CustomFeatureExtractor.register_for_auto_class()
CustomTokenizer.register_for_auto_class()
CustomProcessor.register_for_auto_class()
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = CustomTokenizer(vocab_file)
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = CustomTokenizer(vocab_file)
processor = CustomProcessor(feature_extractor, tokenizer)
processor = CustomProcessor(feature_extractor, tokenizer)
create_repo(tmp_repo, token=self._token)
with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token)
processor.save_pretrained(tmp_dir)
@@ -468,10 +448,6 @@ class ProcessorPushToHubTester(unittest.TestCase):
repo.push_to_hub()
new_processor = AutoProcessor.from_pretrained(tmp_repo, trust_remote_code=True)
new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module
self.assertEqual(new_processor.__class__.__name__, "CustomProcessor")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)

View File

@@ -32,10 +32,9 @@ from typing import Dict, List
from unittest.mock import Mock, patch
import numpy as np
from huggingface_hub import HfFolder, ModelCard, create_branch, delete_repo, list_repo_commits, list_repo_files
from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files
from packaging import version
from parameterized import parameterized
from requests.exceptions import HTTPError
from transformers import (
AutoFeatureExtractor,
@@ -59,6 +58,7 @@ from transformers.testing_utils import (
USER,
CaptureLogger,
LoggingLevel,
TemporaryHubRepo,
TestCasePlus,
backend_device_count,
execute_subprocess_async,
@@ -4152,64 +4152,49 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@classmethod
def tearDownClass(cls):
for model in [
"test-trainer",
"test-trainer-epoch",
"test-trainer-step",
"test-trainer-tensorboard",
"test-trainer-tags",
]:
try:
delete_repo(token=cls._token, repo_id=model)
except HTTPError:
pass
try:
delete_repo(token=cls._token, repo_id="valid_org/test-trainer-org")
except HTTPError:
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer"),
push_to_hub=True,
hub_token=self._token,
)
url = trainer.push_to_hub()
with TemporaryHubRepo(token=self._token) as tmp_repo:
output_dir_name = tmp_repo.repo_name
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_token=self._token,
)
url = trainer.push_to_hub()
# Extract repo_name from the url
re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url)
self.assertTrue(re_search is not None)
repo_name = re_search.groups()[0]
self.assertEqual(repo_name, f"{USER}/test-trainer")
self.assertEqual(repo_name, f"{USER}/{output_dir_name}")
model = RegressionPreTrainedModel.from_pretrained(repo_name)
self.assertEqual(model.a.item(), trainer.model.a.item())
self.assertEqual(model.b.item(), trainer.model.b.item())
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(output_dir=tmp_dir)
trainer.save_model()
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-org"),
push_to_hub=True,
hub_model_id="valid_org/test-trainer-org",
hub_token=self._token,
)
url = trainer.push_to_hub()
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(output_dir=tmp_dir)
trainer.save_model()
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_model_id=f"valid_org/{output_dir_name}",
hub_token=self._token,
)
url = trainer.push_to_hub()
# Extract repo_name from the url
re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url)
self.assertTrue(re_search is not None)
repo_name = re_search.groups()[0]
self.assertEqual(repo_name, "valid_org/test-trainer-org")
self.assertEqual(repo_name, f"valid_org/{output_dir_name}")
model = RegressionPreTrainedModel.from_pretrained("valid_org/test-trainer-org")
model = RegressionPreTrainedModel.from_pretrained(f"valid_org/{output_dir_name}")
self.assertEqual(model.a.item(), trainer.model.a.item())
self.assertEqual(model.b.item(), trainer.model.b.item())
@@ -4226,120 +4211,130 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
return [commit.strip() for commit in commits]
def test_push_to_hub_with_saves_each_epoch(self):
with tempfile.TemporaryDirectory() as tmp_dir:
with self.assertLogs(level="WARNING") as logs:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-epoch"),
push_to_hub=True,
hub_token=self._token,
# To avoid any flakiness if the training goes faster than the uploads.
hub_always_push=True,
save_strategy="epoch",
)
trainer.train()
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
with self.assertLogs(level="WARNING") as logs:
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_token=self._token,
# To avoid any flakiness if the training goes faster than the uploads.
hub_always_push=True,
save_strategy="epoch",
)
trainer.train()
commits = list_repo_commits(f"{USER}/test-trainer-epoch", token=self._token)
commits = [c.title for c in commits]
self.assertIn("initial commit", commits)
self.assertIn("Training in progress, epoch 1", commits)
self.assertIn("Training in progress, epoch 2", commits)
# Epochs 3 and 4 are not guaranteed to be present (empty commits)
self.assertTrue(any("Skipping to prevent empty commit." in record.message for record in logs.records))
commits = list_repo_commits(f"{USER}/{output_dir_name}", token=self._token)
commits = [c.title for c in commits]
self.assertIn("initial commit", commits)
self.assertIn("Training in progress, epoch 1", commits)
self.assertIn("Training in progress, epoch 2", commits)
# Epochs 3 and 4 are not guaranteed to be present (empty commits)
self.assertTrue(any("Skipping to prevent empty commit." in record.message for record in logs.records))
def test_push_to_hub_with_saves_each_n_steps(self):
num_gpus = max(1, backend_device_count(torch_device))
if num_gpus > 2:
self.skipTest(reason="More than 2 GPUs available")
with tempfile.TemporaryDirectory() as tmp_dir:
with self.assertLogs(level="WARNING") as logs:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-step"),
push_to_hub=True,
hub_token=self._token,
# To avoid any flakiness if the training goes faster than the uploads.
hub_always_push=True,
save_strategy="steps",
save_steps=5,
)
trainer.train()
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
with self.assertLogs(level="WARNING") as logs:
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_token=self._token,
# To avoid any flakiness if the training goes faster than the uploads.
hub_always_push=True,
save_strategy="steps",
save_steps=5,
)
trainer.train()
commits = list_repo_commits(f"{USER}/test-trainer-step", token=self._token)
commits = [c.title for c in commits]
self.assertIn("initial commit", commits)
commits = list_repo_commits(f"{USER}/{output_dir_name}", token=self._token)
commits = [c.title for c in commits]
self.assertIn("initial commit", commits)
# Some commits are skipped if nothing has changed
# We expect 1 commit per 5 epochs + 1 commit at the end
nb_empty_commits = len(
[record for record in logs.records if "Skipping to prevent empty commit." in record.message]
)
nb_epoch_commits = len([commit for commit in commits if "Training in progress, step" in commit])
# Some commits are skipped if nothing has changed
# We expect 1 commit per 5 epochs + 1 commit at the end
nb_empty_commits = len(
[record for record in logs.records if "Skipping to prevent empty commit." in record.message]
)
nb_epoch_commits = len([commit for commit in commits if "Training in progress, step" in commit])
# max_steps depend on the number of available GPUs
max_steps = math.ceil(trainer.args.num_train_epochs * len(trainer.get_train_dataloader()))
nb_expected_commits = len(range(5, max_steps, 5))
# max_steps depend on the number of available GPUs
max_steps = math.ceil(trainer.args.num_train_epochs * len(trainer.get_train_dataloader()))
nb_expected_commits = len(range(5, max_steps, 5))
# '>=' since final commit might be an empty commit as well (not deterministic)
self.assertGreaterEqual(nb_empty_commits + nb_epoch_commits, nb_expected_commits)
# '>=' since final commit might be an empty commit as well (not deterministic)
self.assertGreaterEqual(nb_empty_commits + nb_epoch_commits, nb_expected_commits)
@require_tensorboard
def test_push_to_hub_with_tensorboard_logs(self):
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-tensorboard"),
hub_token=self._token,
save_strategy="epoch",
report_to=["tensorboard"],
keep_report_to=True,
)
trainer.train()
# Push the runs via `push_to_hub()`
trainer.push_to_hub()
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
hub_token=self._token,
save_strategy="epoch",
report_to=["tensorboard"],
keep_report_to=True,
)
trainer.train()
# Push the runs via `push_to_hub()`
trainer.push_to_hub()
files = list_repo_files(f"{USER}/test-trainer-tensorboard", token=self._token)
found_log = False
for f in files:
if len(f.split("runs")) > 1 and "events.out.tfevents" in f:
found_log = True
files = list_repo_files(f"{USER}/{output_dir_name}", token=self._token)
found_log = False
for f in files:
if len(f.split("runs")) > 1 and "events.out.tfevents" in f:
found_log = True
assert found_log is True, "No tensorboard log found in repo"
assert found_log is True, "No tensorboard log found in repo"
def test_push_to_hub_tags(self):
# Checks if `trainer.push_to_hub()` works correctly by adding the desired
# tag without having to pass `tags` in `push_to_hub`
# see:
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-tags"),
push_to_hub=True,
hub_token=self._token,
)
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_token=self._token,
)
trainer.model.add_model_tags(["test-trainer-tags"])
trainer.model.add_model_tags(["test-trainer-tags"])
url = trainer.push_to_hub()
url = trainer.push_to_hub()
# Extract repo_name from the url
re_search = re.search(ENDPOINT_STAGING + r"/([^/]+/[^/]+)/", url)
self.assertTrue(re_search is not None)
repo_name = re_search.groups()[0]
self.assertEqual(repo_name, f"{USER}/test-trainer-tags")
self.assertEqual(repo_name, f"{USER}/{output_dir_name}")
model_card = ModelCard.load(repo_name)
self.assertTrue("test-trainer-tags" in model_card.data.tags)
def test_push_to_hub_with_revision(self):
# Checks if `trainer.push_to_hub()` works correctly by adding revision
with tempfile.TemporaryDirectory() as tmp_dir:
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, "test-trainer-revision"),
push_to_hub=True,
hub_token=self._token,
)
branch = "v1.0"
create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True)
url = trainer.push_to_hub(revision=branch)
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
output_dir_name = tmp_repo.repo_name
trainer = get_regression_trainer(
output_dir=os.path.join(tmp_dir, output_dir_name),
push_to_hub=True,
hub_token=self._token,
)
branch = "v1.0"
create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True)
url = trainer.push_to_hub(revision=branch)
# Extract branch from the url
re_search = re.search(r"tree/([^/]+)/", url)

View File

@@ -22,12 +22,12 @@ import unittest.mock as mock
import warnings
from pathlib import Path
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
from transformers import AutoConfig, BertConfig, GPT2Config
from transformers.configuration_utils import PretrainedConfig
from transformers.testing_utils import TOKEN, USER, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
@@ -98,106 +98,72 @@ class ConfigPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
config.push_to_hub(tmp_repo, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-config-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
config.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
config.push_to_hub(tmp_repo.repo_id, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-config-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
# Push to hub via save_pretrained
config.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = BertConfig.from_pretrained(tmp_repo.repo_id)
for k, v in config.to_dict().items():
if k != "transformers_version":
self.assertEqual(v, getattr(new_config, k))
def test_push_to_hub_dynamic_config(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-config-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomConfig.register_for_auto_class()
config = CustomConfig(attribute=42)
CustomConfig.register_for_auto_class()
config = CustomConfig(attribute=42)
config.push_to_hub(tmp_repo.repo_id, token=self._token)
config.push_to_hub(tmp_repo, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
# This has added the proper auto_map field to the config
self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"})
new_config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module
self.assertEqual(new_config.__class__.__name__, "CustomConfig")
self.assertEqual(new_config.attribute, 42)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_config is from the FakeConfig class of a dynamic module
self.assertEqual(new_config.__class__.__name__, "CustomConfig")
self.assertEqual(new_config.attribute, 42)
class ConfigTestUtils(unittest.TestCase):

View File

@@ -20,11 +20,11 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
@@ -60,91 +60,63 @@ class FeatureExtractorPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_dynamic_feature_extractor(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-feature-extractor-{Path(tmp_dir).name}"
CustomFeatureExtractor.register_for_auto_class()
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(
feature_extractor.auto_map,
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
with TemporaryHubRepo(token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
feature_extractor.save_pretrained(
tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token
)
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_in_organization(self):
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_in_organization_via_save_pretrained(self):
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
feature_extractor.save_pretrained(
tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token
)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_dynamic_feature_extractor(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomFeatureExtractor.register_for_auto_class()
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(
feature_extractor.auto_map,
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
)
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")

View File

@@ -19,12 +19,12 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
from transformers import AutoImageProcessor, ViTImageProcessor
from transformers.image_processing_utils import get_size_dict
from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
@@ -71,93 +71,62 @@ class ImageProcessorPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}"
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
image_processor.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
image_processor.push_to_hub(tmp_repo.repo_id, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-image-processor-{Path(tmp_dir).name}"
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
# Push to hub via save_pretrained
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}"
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
image_processor.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
image_processor.push_to_hub(tmp_repo.repo_id, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-image-processor-{Path(tmp_dir).name}"
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
# Push to hub via save_pretrained
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
image_processor.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_image_processor = ViTImageProcessor.from_pretrained(tmp_repo.repo_id)
for k, v in image_processor.__dict__.items():
self.assertEqual(v, getattr(new_image_processor, k))
def test_push_to_hub_dynamic_image_processor(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-image-processor-{Path(tmp_dir).name}"
CustomImageProcessor.register_for_auto_class()
image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomImageProcessor.register_for_auto_class()
image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
image_processor.push_to_hub(tmp_repo, token=self._token)
image_processor.push_to_hub(tmp_repo.repo_id, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(
image_processor.auto_map,
{"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"},
)
# This has added the proper auto_map field to the config
self.assertDictEqual(
image_processor.auto_map,
{"AutoImageProcessor": "custom_image_processing.CustomImageProcessor"},
)
new_image_processor = AutoImageProcessor.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module
self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_image_processor = AutoImageProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module
self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor")
class ImageProcessingUtilsTester(unittest.TestCase):

View File

@@ -14,16 +14,15 @@
import tempfile
import unittest
from pathlib import Path
import numpy as np
from huggingface_hub import HfFolder, delete_repo, snapshot_download
from huggingface_hub import HfFolder, snapshot_download
from transformers import BertConfig, BertModel, is_flax_available, is_torch_available
from transformers.testing_utils import (
TOKEN,
USER,
CaptureLogger,
TemporaryHubRepo,
is_pt_flax_cross_test,
is_staging_test,
require_flax,
@@ -55,103 +54,77 @@ class FlaxModelPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
model.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
new_model = FlaxBertModel.from_pretrained(tmp_repo)
new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id)
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-flax-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_model = FlaxBertModel.from_pretrained(tmp_repo)
new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id)
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
model.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
new_model = FlaxBertModel.from_pretrained(tmp_repo)
new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id)
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-flax-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = FlaxBertModel(config)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_model = FlaxBertModel.from_pretrained(tmp_repo)
new_model = FlaxBertModel.from_pretrained(tmp_repo.repo_id)
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
base_params = flatten_dict(unfreeze(model.params))
new_params = flatten_dict(unfreeze(new_model.params))
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
for key in base_params.keys():
max_diff = (base_params[key] - new_params[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
def check_models_equal(model1, model2):

View File

@@ -23,9 +23,8 @@ import random
import tempfile
import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder, Repository, delete_repo, snapshot_download
from huggingface_hub import HfFolder, Repository, snapshot_download
from requests.exceptions import HTTPError
from transformers import is_tf_available, is_torch_available
@@ -34,6 +33,7 @@ from transformers.testing_utils import ( # noqa: F401
TOKEN,
USER,
CaptureLogger,
TemporaryHubRepo,
_tf_gpu_memory_limit,
is_pt_tf_cross_test,
is_staging_test,
@@ -683,149 +683,119 @@ class TFModelPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
logging.set_verbosity_info()
logger = logging.get_logger("transformers.utils.hub")
with CaptureLogger(logger) as cl:
model.push_to_hub(tmp_repo, token=self._token)
logging.set_verbosity_warning()
# Check the model card was created and uploaded.
self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out)
logging.set_verbosity_info()
logger = logging.get_logger("transformers.utils.hub")
with CaptureLogger(logger) as cl:
model.push_to_hub(tmp_repo.repo_id, token=self._token)
logging.set_verbosity_warning()
# Check the model card was created and uploaded.
self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out)
new_model = TFBertModel.from_pretrained(tmp_repo)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo.repo_id)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-tf-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo.repo_id)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
@is_pt_tf_cross_test
def test_push_to_hub_callback(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-tf-callback-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertForMaskedLM(config)
model.compile()
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertForMaskedLM(config)
model.compile()
with tempfile.TemporaryDirectory() as tmp_dir:
push_to_hub_callback = PushToHubCallback(
output_dir=tmp_dir,
hub_model_id=tmp_repo,
hub_model_id=tmp_repo.repo_id,
hub_token=self._token,
)
model.fit(model.dummy_inputs, model.dummy_inputs, epochs=1, callbacks=[push_to_hub_callback])
new_model = TFBertForMaskedLM.from_pretrained(tmp_repo)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
new_model = TFBertForMaskedLM.from_pretrained(tmp_repo.repo_id)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters)
tf_push_to_hub_params.pop("base_model_card_args")
pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters)
pt_push_to_hub_params.pop("deprecated_kwargs")
self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
tf_push_to_hub_params = dict(inspect.signature(TFPreTrainedModel.push_to_hub).parameters)
tf_push_to_hub_params.pop("base_model_card_args")
pt_push_to_hub_params = dict(inspect.signature(PreTrainedModel.push_to_hub).parameters)
pt_push_to_hub_params.pop("deprecated_kwargs")
self.assertDictEaual(tf_push_to_hub_params, pt_push_to_hub_params)
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
model.push_to_hub(tmp_repo, token=self._token)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo.repo_id)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-tf-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = TFBertModel(config)
# Make sure model is properly initialized
model.build_in_name_scope()
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo.repo_id)
new_model = TFBertModel.from_pretrained(tmp_repo)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = TFBertModel.from_pretrained(tmp_repo.repo_id)
models_equal = True
for p1, p2 in zip(model.weights, new_model.weights):
if not tf.math.reduce_all(p1 == p2):
models_equal = False
break
self.assertTrue(models_equal)

View File

@@ -28,7 +28,7 @@ import warnings
from pathlib import Path
import requests
from huggingface_hub import HfApi, HfFolder, delete_repo
from huggingface_hub import HfApi, HfFolder
from pytest import mark
from requests.exceptions import HTTPError
@@ -44,9 +44,9 @@ from transformers import (
)
from transformers.testing_utils import (
TOKEN,
USER,
CaptureLogger,
LoggingLevel,
TemporaryHubRepo,
TestCasePlus,
is_staging_test,
require_accelerate,
@@ -2000,168 +2000,127 @@ class ModelPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
@unittest.skip(reason="This test is flaky")
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
model.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
@unittest.skip(reason="This test is flaky")
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
def test_push_to_hub_with_description(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-model-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
COMMIT_DESCRIPTION = """
with TemporaryHubRepo(token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
COMMIT_DESCRIPTION = """
The commit description supports markdown synthax see:
```python
>>> form transformers import AutoConfig
>>> config = AutoConfig.from_pretrained("google-bert/bert-base-uncased")
```
"""
commit_details = model.push_to_hub(
tmp_repo, use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION
)
self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
commit_details = model.push_to_hub(
tmp_repo.repo_id, use_auth_token=self._token, create_pr=True, commit_description=COMMIT_DESCRIPTION
)
self.assertEqual(commit_details.commit_description, COMMIT_DESCRIPTION)
@unittest.skip(reason="This test is flaky")
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
model.push_to_hub(tmp_repo, token=self._token)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
@unittest.skip(reason="This test is flaky")
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-model-org-{Path(tmp_dir).name}"
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
# Push to hub via save_pretrained
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo)
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
)
model = BertModel(config)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir, push_to_hub=True, token=self._token, repo_id=tmp_repo.repo_id)
new_model = BertModel.from_pretrained(tmp_repo)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_model = BertModel.from_pretrained(tmp_repo.repo_id)
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
def test_push_to_hub_dynamic_model(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-model-{Path(tmp_dir).name}"
CustomConfig.register_for_auto_class()
CustomModel.register_for_auto_class()
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomConfig.register_for_auto_class()
CustomModel.register_for_auto_class()
config = CustomConfig(hidden_size=32)
model = CustomModel(config)
config = CustomConfig(hidden_size=32)
model = CustomModel(config)
model.push_to_hub(tmp_repo, token=self._token)
# checks
self.assertDictEqual(
config.auto_map,
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
# checks
self.assertDictEqual(
config.auto_map,
{"AutoConfig": "custom_configuration.CustomConfig", "AutoModel": "custom_modeling.CustomModel"},
)
new_model = AutoModel.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
self.assertEqual(new_model.__class__.__name__, "CustomModel")
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
new_model = AutoModel.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_model is from the CustomModel class of a dynamic module
self.assertEqual(new_model.__class__.__name__, "CustomModel")
for p1, p2 in zip(model.parameters(), new_model.parameters()):
self.assertTrue(torch.equal(p1, p2))
config = AutoConfig.from_pretrained(tmp_repo, trust_remote_code=True)
new_model = AutoModel.from_config(config, trust_remote_code=True)
self.assertEqual(new_model.__class__.__name__, "CustomModel")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
new_model = AutoModel.from_config(config, trust_remote_code=True)
self.assertEqual(new_model.__class__.__name__, "CustomModel")
def test_push_to_hub_with_tags(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-model-with-tags-{Path(tmp_dir).name}"
from huggingface_hub import ModelCard
with TemporaryHubRepo(token=self._token) as tmp_repo:
from huggingface_hub import ModelCard
new_tags = ["tag-1", "tag-2"]
new_tags = ["tag-1", "tag-2"]
CustomConfig.register_for_auto_class()
CustomModel.register_for_auto_class()
CustomConfig.register_for_auto_class()
CustomModel.register_for_auto_class()
config = CustomConfig(hidden_size=32)
model = CustomModel(config)
config = CustomConfig(hidden_size=32)
model = CustomModel(config)
self.assertTrue(model.model_tags is None)
self.assertTrue(model.model_tags is None)
model.add_model_tags(new_tags)
model.add_model_tags(new_tags)
self.assertTrue(model.model_tags == new_tags)
self.assertTrue(model.model_tags == new_tags)
model.push_to_hub(tmp_repo, token=self._token)
model.push_to_hub(tmp_repo.repo_id, token=self._token)
loaded_model_card = ModelCard.load(tmp_repo)
self.assertEqual(loaded_model_card.data.tags, new_tags)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
loaded_model_card = ModelCard.load(tmp_repo.repo_id)
self.assertEqual(loaded_model_card.data.tags, new_tags)
@require_torch

View File

@@ -20,7 +20,7 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder
from huggingface_hub.file_download import http_get
from requests.exceptions import HTTPError
@@ -32,7 +32,7 @@ from transformers import (
GPT2TokenizerFast,
is_tokenizers_available,
)
from transformers.testing_utils import TOKEN, USER, is_staging_test, require_tokenizers
from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, require_tokenizers
from transformers.tokenization_utils import ExtensionsTrie, Trie
@@ -118,114 +118,84 @@ class TokenizerPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
tokenizer.push_to_hub(tmp_repo, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-tokenizer-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
# Push to hub via save_pretrained
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}"
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
tokenizer.push_to_hub(tmp_repo, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-tokenizer-{Path(tmp_dir).name}"
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = BertTokenizer(vocab_file)
# Push to hub via save_pretrained
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
tokenizer.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_tokenizer = BertTokenizer.from_pretrained(tmp_repo.repo_id)
self.assertDictEqual(new_tokenizer.vocab, tokenizer.vocab)
@require_tokenizers
def test_push_to_hub_dynamic_tokenizer(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}"
CustomTokenizer.register_for_auto_class()
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomTokenizer.register_for_auto_class()
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
tokenizer = CustomTokenizer(vocab_file)
# No fast custom tokenizer
tokenizer.push_to_hub(tmp_repo, token=self._token)
# No fast custom tokenizer
tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token)
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the CustomTokenizer class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
@require_tokenizers
def test_push_to_hub_dynamic_tokenizer_with_both_slow_and_fast_classes(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-tokenizer-{Path(tmp_dir).name}"
CustomTokenizer.register_for_auto_class()
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomTokenizer.register_for_auto_class()
# Fast and slow custom tokenizer
CustomTokenizerFast.register_for_auto_class()
# Fast and slow custom tokenizer
CustomTokenizerFast.register_for_auto_class()
with tempfile.TemporaryDirectory() as tmp_dir:
vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
@@ -234,17 +204,14 @@ class TokenizerPushToHubTester(unittest.TestCase):
bert_tokenizer.save_pretrained(tmp_dir)
tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir)
tokenizer.push_to_hub(tmp_repo, token=self._token)
tokenizer.push_to_hub(tmp_repo.repo_id, token=self._token)
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast")
tokenizer = AutoTokenizer.from_pretrained(tmp_repo, use_fast=False, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizerFast")
tokenizer = AutoTokenizer.from_pretrained(tmp_repo.repo_id, use_fast=False, trust_remote_code=True)
# Can't make an isinstance check because the new_model.config is from the FakeConfig class of a dynamic module
self.assertEqual(tokenizer.__class__.__name__, "CustomTokenizer")
class TrieTest(unittest.TestCase):