Fix flaky Hub CI (test_trainer.py) (#35062)

* fix

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* check

* check

* check

* check

* check

* check

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* Update src/transformers/testing_utils.py

Co-authored-by: Lucain <lucainp@gmail.com>

* check

* check

* check

* Final space

* Final adjustment

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: Lucain <lucainp@gmail.com>
This commit is contained in:
Yih-Dar
2024-12-05 17:02:27 +01:00
committed by GitHub
parent a928d9c128
commit b0a51e5cff
11 changed files with 670 additions and 922 deletions

View File

@@ -20,11 +20,11 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
from transformers.testing_utils import TOKEN, USER, get_tests_dir, is_staging_test
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
sys.path.append(str(Path(__file__).parent.parent.parent / "utils"))
@@ -60,91 +60,63 @@ class FeatureExtractorPushToHubTester(unittest.TestCase):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@staticmethod
def _try_delete_repo(repo_id, token):
try:
# Reset repo
delete_repo(repo_id=repo_id, token=token)
except: # noqa E722
pass
def test_push_to_hub(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
with TemporaryHubRepo(token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_in_organization(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_in_organization_via_save_pretrained(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"valid_org/test-feature-extractor-{Path(tmp_dir).name}"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
feature_extractor.save_pretrained(tmp_dir, repo_id=tmp_repo, push_to_hub=True, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
def test_push_to_hub_dynamic_feature_extractor(self):
with tempfile.TemporaryDirectory() as tmp_dir:
try:
tmp_repo = f"{USER}/test-dynamic-feature-extractor-{Path(tmp_dir).name}"
CustomFeatureExtractor.register_for_auto_class()
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(
feature_extractor.auto_map,
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
with TemporaryHubRepo(token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
feature_extractor.save_pretrained(
tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token
)
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo, trust_remote_code=True)
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")
finally:
# Always (try to) delete the repo.
self._try_delete_repo(repo_id=tmp_repo, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_in_organization(self):
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_in_organization_via_save_pretrained(self):
with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo:
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
# Push to hub via save_pretrained
with tempfile.TemporaryDirectory() as tmp_dir:
feature_extractor.save_pretrained(
tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token
)
new_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(tmp_repo.repo_id)
for k, v in feature_extractor.__dict__.items():
self.assertEqual(v, getattr(new_feature_extractor, k))
def test_push_to_hub_dynamic_feature_extractor(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:
CustomFeatureExtractor.register_for_auto_class()
feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR)
feature_extractor.push_to_hub(tmp_repo.repo_id, token=self._token)
# This has added the proper auto_map field to the config
self.assertDictEqual(
feature_extractor.auto_map,
{"AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor"},
)
new_feature_extractor = AutoFeatureExtractor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_feature_extractor is from the CustomFeatureExtractor class of a dynamic module
self.assertEqual(new_feature_extractor.__class__.__name__, "CustomFeatureExtractor")