Refine errors for pretrained objects (#15261)

* Refine errors for pretrained objects

* PoC to avoid using get_list_of_files

* Adapt tests to use new errors

* Quality + Fix PoC

* Revert "PoC to avoid using get_list_of_files"

This reverts commit cb93b7cae8504ef837c2a7663cb7955e714f323e.

* Revert "Quality + Fix PoC"

This reverts commit 3ba6d0d4ca546708b31d355baa9e68ba9736508f.

* Fix doc

* Revert PoC

* Add feature extractors

* More tests and PT model

* Adapt error message

* Feature extractor tests

* TF model

* Flax model and test

* Merge flax auto tests

* Add tokenization

* Fix test
This commit is contained in:
Sylvain Gugger
2022-01-21 15:00:09 -05:00
committed by GitHub
parent 80af1048cf
commit 6ac77534bf
16 changed files with 603 additions and 103 deletions

View File

@@ -83,3 +83,22 @@ class AutoConfigTest(unittest.TestCase):
finally:
if "new-model" in CONFIG_MAPPING._extra_content:
del CONFIG_MAPPING._extra_content["new-model"]
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = AutoConfig.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = AutoConfig.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_configuration_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/no-config-test-repo does not appear to have a file named config.json.",
):
_ = AutoConfig.from_pretrained("hf-internal-testing/no-config-test-repo")

View File

@@ -19,6 +19,7 @@ import tempfile
import unittest
from transformers import AutoFeatureExtractor, Wav2Vec2Config, Wav2Vec2FeatureExtractor
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures")
@@ -62,3 +63,22 @@ class AutoFeatureExtractorTest(unittest.TestCase):
def test_feature_extractor_from_local_file(self):
config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG)
self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = AutoFeatureExtractor.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = AutoFeatureExtractor.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_feature_extractor_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/config-no-model does not appear to have a file named preprocessor_config.json.",
):
_ = AutoFeatureExtractor.from_pretrained("hf-internal-testing/config-no-model")

View File

@@ -17,17 +17,22 @@ import importlib
import io
import unittest
import requests
import transformers
# Try to import everything from transformers to ensure every object can be loaded.
from transformers import * # noqa F406
from transformers.file_utils import (
CONFIG_NAME,
FLAX_WEIGHTS_NAME,
TF2_WEIGHTS_NAME,
WEIGHTS_NAME,
ContextManagers,
EntryNotFoundError,
RepositoryNotFoundError,
RevisionNotFoundError,
filename_to_url,
get_from_cache,
has_file,
hf_bucket_url,
)
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER
@@ -83,13 +88,19 @@ class GetFromCacheTests(unittest.TestCase):
def test_file_not_found(self):
# Valid revision (None) but missing file.
url = hf_bucket_url(MODEL_ID, filename="missing.bin")
with self.assertRaisesRegex(requests.exceptions.HTTPError, "404 Client Error"):
with self.assertRaisesRegex(EntryNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_model_not_found(self):
# Invalid model file.
url = hf_bucket_url("bert-base", filename="pytorch_model.bin")
with self.assertRaisesRegex(RepositoryNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_revision_not_found(self):
# Valid file but missing revision
url = hf_bucket_url(MODEL_ID, filename=CONFIG_NAME, revision=REVISION_ID_INVALID)
with self.assertRaisesRegex(requests.exceptions.HTTPError, "404 Client Error"):
with self.assertRaisesRegex(RevisionNotFoundError, "404 Client Error"):
_ = get_from_cache(url)
def test_standard_object(self):
@@ -112,6 +123,11 @@ class GetFromCacheTests(unittest.TestCase):
metadata = filename_to_url(filepath)
self.assertEqual(metadata, (url, f'"{PINNED_SHA256}"'))
def test_has_file(self):
self.assertTrue(has_file("hf-internal-testing/tiny-bert-pt-only", WEIGHTS_NAME))
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", TF2_WEIGHTS_NAME))
self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", FLAX_WEIGHTS_NAME))
class ContextManagerTests(unittest.TestCase):
@unittest.mock.patch("sys.stdout", new_callable=io.StringIO)

View File

@@ -389,3 +389,30 @@ class AutoModelTest(unittest.TestCase):
):
if NewModelConfig in mapping._extra_content:
del mapping._extra_content[NewModelConfig]
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = AutoModel.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = AutoModel.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_model_file_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/config-no-model does not appear to have a file named pytorch_model.bin",
):
_ = AutoModel.from_pretrained("hf-internal-testing/config-no-model")
def test_model_from_tf_suggestion(self):
with self.assertRaisesRegex(EnvironmentError, "Use `from_tf=True` to load this model"):
_ = AutoModel.from_pretrained("hf-internal-testing/tiny-bert-tf-only")
def test_model_from_flax_suggestion(self):
with self.assertRaisesRegex(EnvironmentError, "Use `from_flax=True` to load this model"):
_ = AutoModel.from_pretrained("hf-internal-testing/tiny-bert-flax-only")

View File

@@ -15,7 +15,7 @@
import unittest
from transformers import AutoConfig, AutoTokenizer, BertConfig, TensorType, is_flax_available
from transformers.testing_utils import require_flax, slow
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER, require_flax, slow
if is_flax_available():
@@ -76,3 +76,26 @@ class FlaxAutoModelTest(unittest.TestCase):
return model(**kwargs)
eval(**tokens).block_until_ready()
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = FlaxAutoModel.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = FlaxAutoModel.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_model_file_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/config-no-model does not appear to have a file named flax_model.msgpack",
):
_ = FlaxAutoModel.from_pretrained("hf-internal-testing/config-no-model")
def test_model_from_pt_suggestion(self):
with self.assertRaisesRegex(EnvironmentError, "Use `from_pt=True` to load this model"):
_ = FlaxAutoModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only")

View File

@@ -309,3 +309,26 @@ class TFAutoModelTest(unittest.TestCase):
):
if NewModelConfig in mapping._extra_content:
del mapping._extra_content[NewModelConfig]
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = TFAutoModel.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = TFAutoModel.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_model_file_not_found(self):
with self.assertRaisesRegex(
EnvironmentError,
"hf-internal-testing/config-no-model does not appear to have a file named tf_model.h5",
):
_ = TFAutoModel.from_pretrained("hf-internal-testing/config-no-model")
def test_model_from_pt_suggestion(self):
with self.assertRaisesRegex(EnvironmentError, "Use `from_pt=True` to load this model"):
_ = TFAutoModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only")

View File

@@ -150,7 +150,8 @@ class AutoTokenizerTest(unittest.TestCase):
def test_tokenizer_identifier_non_existent(self):
for tokenizer_class in [BertTokenizer, BertTokenizerFast, AutoTokenizer]:
with self.assertRaisesRegex(
ValueError, ".*is not a local path or a model identifier on the model Hub. Did you make a typo?"
EnvironmentError,
"julien-c/herlolip-not-exists is not a local folder and is not a valid model identifier",
):
_ = tokenizer_class.from_pretrained("julien-c/herlolip-not-exists")
@@ -310,3 +311,15 @@ class AutoTokenizerTest(unittest.TestCase):
del CONFIG_MAPPING._extra_content["new-model"]
if NewConfig in TOKENIZER_MAPPING._extra_content:
del TOKENIZER_MAPPING._extra_content[NewConfig]
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"
):
_ = AutoTokenizer.from_pretrained("bert-base")
def test_revision_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
):
_ = AutoTokenizer.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")