From c99ddcc44104ded3a48fef174125ce51d011e650 Mon Sep 17 00:00:00 2001 From: Simon Brandeis <33657802+SBrandeis@users.noreply.github.com> Date: Fri, 10 Jun 2022 15:41:53 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Properly=20raise=20`RepoNotFound?= =?UTF-8?q?Error`=20when=20not=20authenticated=20(#17651)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Raise RepoNotFoundError in case of 401 * Include changes from revert-17646-skip_repo_not_found * Add a comment * 💄 Code quality * 💚 Update `get_from_cache` test * 💚 Code quality & skip failing test --- examples/research_projects/lxmert/demo.ipynb | 2 +- .../research_projects/visual_bert/demo.ipynb | 2 +- src/transformers/utils/hub.py | 22 +++++++++++++------ tests/models/auto/test_configuration_auto.py | 1 - .../auto/test_feature_extraction_auto.py | 1 - tests/models/auto/test_modeling_auto.py | 1 - tests/models/auto/test_modeling_flax_auto.py | 1 - tests/models/auto/test_modeling_tf_auto.py | 1 - tests/models/auto/test_tokenization_auto.py | 2 -- tests/utils/test_file_utils.py | 20 +++++++++++------ 10 files changed, 30 insertions(+), 23 deletions(-) diff --git a/examples/research_projects/lxmert/demo.ipynb b/examples/research_projects/lxmert/demo.ipynb index 55658ae111..e80865d0e2 100644 --- a/examples/research_projects/lxmert/demo.ipynb +++ b/examples/research_projects/lxmert/demo.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "#%pip install-r requirements.txt" + "# %pip install-r requirements.txt" ] }, { diff --git a/examples/research_projects/visual_bert/demo.ipynb b/examples/research_projects/visual_bert/demo.ipynb index a025e419a3..14a65ce3df 100644 --- a/examples/research_projects/visual_bert/demo.ipynb +++ b/examples/research_projects/visual_bert/demo.ipynb @@ -4,7 +4,7 @@ "cell_type": "code", "execution_count": 1, "source": [ - "#%pip install-r requirements.txt" + "# %pip install-r requirements.txt" ], "outputs": [], "metadata": {} diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 52aac56cf0..a4717cf7ea 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -38,6 +38,7 @@ import requests from filelock import FileLock from huggingface_hub import HfFolder, Repository, create_repo, list_repo_files, whoami from requests.exceptions import HTTPError +from requests.models import Response from transformers.utils.logging import tqdm from . import __version__, logging @@ -398,20 +399,27 @@ class RevisionNotFoundError(HTTPError): """Raised when trying to access a hf.co URL with a valid repository but an invalid revision.""" -def _raise_for_status(request): +def _raise_for_status(response: Response): """ Internal version of `request.raise_for_status()` that will refine a potential HTTPError. """ - if "X-Error-Code" in request.headers: - error_code = request.headers["X-Error-Code"] + if "X-Error-Code" in response.headers: + error_code = response.headers["X-Error-Code"] if error_code == "RepoNotFound": - raise RepositoryNotFoundError(f"404 Client Error: Repository Not Found for url: {request.url}") + raise RepositoryNotFoundError(f"404 Client Error: Repository Not Found for url: {response.url}") elif error_code == "EntryNotFound": - raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {request.url}") + raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {response.url}") elif error_code == "RevisionNotFound": - raise RevisionNotFoundError(f"404 Client Error: Revision Not Found for url: {request.url}") + raise RevisionNotFoundError(f"404 Client Error: Revision Not Found for url: {response.url}") - request.raise_for_status() + if response.status_code == 401: + # The repo was not found and the user is not Authenticated + raise RepositoryNotFoundError( + f"401 Client Error: Repository not found for url: {response.url}. " + "If the repo is private, make sure you are authenticated." + ) + + response.raise_for_status() def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers: Optional[Dict[str, str]] = None): diff --git a/tests/models/auto/test_configuration_auto.py b/tests/models/auto/test_configuration_auto.py index 473d3ab2bd..2695082c41 100644 --- a/tests/models/auto/test_configuration_auto.py +++ b/tests/models/auto/test_configuration_auto.py @@ -88,7 +88,6 @@ class AutoConfigTest(unittest.TestCase): if "custom" in CONFIG_MAPPING._extra_content: del CONFIG_MAPPING._extra_content["custom"] - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/models/auto/test_feature_extraction_auto.py b/tests/models/auto/test_feature_extraction_auto.py index 9bba9b86bd..e9d044e8da 100644 --- a/tests/models/auto/test_feature_extraction_auto.py +++ b/tests/models/auto/test_feature_extraction_auto.py @@ -76,7 +76,6 @@ class AutoFeatureExtractorTest(unittest.TestCase): config = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG) self.assertIsInstance(config, Wav2Vec2FeatureExtractor) - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/models/auto/test_modeling_auto.py b/tests/models/auto/test_modeling_auto.py index ed2cbb60f7..3731d70f5b 100644 --- a/tests/models/auto/test_modeling_auto.py +++ b/tests/models/auto/test_modeling_auto.py @@ -328,7 +328,6 @@ class AutoModelTest(unittest.TestCase): if CustomConfig in mapping._extra_content: del mapping._extra_content[CustomConfig] - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/models/auto/test_modeling_flax_auto.py b/tests/models/auto/test_modeling_flax_auto.py index 70dd8692c8..26f80f9706 100644 --- a/tests/models/auto/test_modeling_flax_auto.py +++ b/tests/models/auto/test_modeling_flax_auto.py @@ -77,7 +77,6 @@ class FlaxAutoModelTest(unittest.TestCase): eval(**tokens).block_until_ready() - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/models/auto/test_modeling_tf_auto.py b/tests/models/auto/test_modeling_tf_auto.py index 528557a34e..a803a34511 100644 --- a/tests/models/auto/test_modeling_tf_auto.py +++ b/tests/models/auto/test_modeling_tf_auto.py @@ -265,7 +265,6 @@ class TFAutoModelTest(unittest.TestCase): if NewModelConfig in mapping._extra_content: del mapping._extra_content[NewModelConfig] - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/models/auto/test_tokenization_auto.py b/tests/models/auto/test_tokenization_auto.py index b8b19a5db0..1e1abb9245 100644 --- a/tests/models/auto/test_tokenization_auto.py +++ b/tests/models/auto/test_tokenization_auto.py @@ -142,7 +142,6 @@ class AutoTokenizerTest(unittest.TestCase): self.assertEqual(tokenizer.model_max_length, 512) - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") @require_tokenizers def test_tokenizer_identifier_non_existent(self): for tokenizer_class in [BertTokenizer, BertTokenizerFast, AutoTokenizer]: @@ -330,7 +329,6 @@ class AutoTokenizerTest(unittest.TestCase): else: self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer") - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") def test_repo_not_found(self): with self.assertRaisesRegex( EnvironmentError, "bert-base is not a local folder and is not a valid model identifier" diff --git a/tests/utils/test_file_utils.py b/tests/utils/test_file_utils.py index 3733cdf326..19adfe21dd 100644 --- a/tests/utils/test_file_utils.py +++ b/tests/utils/test_file_utils.py @@ -99,12 +99,19 @@ class GetFromCacheTests(unittest.TestCase): with self.assertRaisesRegex(EntryNotFoundError, "404 Client Error"): _ = get_from_cache(url) - @unittest.skip("Temp bug in the Hub not returning RepoNotFound errors.") - def test_model_not_found(self): - # Invalid model file. + def test_model_not_found_not_authenticated(self): + # Invalid model id. + url = hf_bucket_url("bert-base", filename="pytorch_model.bin") + with self.assertRaisesRegex(RepositoryNotFoundError, "401 Client Error"): + _ = get_from_cache(url) + + @unittest.skip("No authentication when testing against prod") + def test_model_not_found_authenticated(self): + # Invalid model id. url = hf_bucket_url("bert-base", filename="pytorch_model.bin") with self.assertRaisesRegex(RepositoryNotFoundError, "404 Client Error"): - _ = get_from_cache(url) + _ = get_from_cache(url, use_auth_token="hf_sometoken") + # ^ TODO - if we decide to unskip this: use a real / functional token def test_revision_not_found(self): # Valid file but missing revision @@ -142,9 +149,8 @@ class GetFromCacheTests(unittest.TestCase): self.assertIsNone(get_file_from_repo("bert-base-cased", "ahah.txt")) # The function raises if the repository does not exist. - # Uncomment when bug is fixed. - # with self.assertRaisesRegex(EnvironmentError, "is not a valid model identifier"): - # get_file_from_repo("bert-base-case", "config.json") + with self.assertRaisesRegex(EnvironmentError, "is not a valid model identifier"): + get_file_from_repo("bert-base-case", "config.json") # The function raises if the revision does not exist. with self.assertRaisesRegex(EnvironmentError, "is not a valid git identifier"):