From 34be08efcd4d318785b3eac592f27a3d5dd2144b Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Tue, 27 Sep 2022 09:36:34 -0400
Subject: [PATCH] More tests for regression in cached non existence (#19216)

* More tests for regression in cached non existence

* Style
---
 tests/test_tokenization_common.py | 21 ++++++++++++++++++++-
 tests/utils/test_hub_utils.py     | 15 +++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py
index 48add3f4f9..537f5fb9bd 100644
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -40,6 +40,7 @@ from transformers import (
     AutoTokenizer,
     BertTokenizer,
     BertTokenizerFast,
+    GPT2TokenizerFast,
     PreTrainedTokenizer,
     PreTrainedTokenizerBase,
     PreTrainedTokenizerFast,
@@ -3884,12 +3885,30 @@ class TokenizerUtilTester(unittest.TestCase):
         # Download this model to make sure it's in the cache.
         _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
 
-        # Under the mock environment we get a 500 error when trying to reach the model.
+        # Under the mock environment we get a 500 error when trying to reach the tokenizer.
         with mock.patch("requests.request", return_value=response_mock) as mock_head:
             _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
             # This check we did call the fake head request
             mock_head.assert_called()
 
+    @require_tokenizers
+    def test_cached_files_are_used_when_internet_is_down_missing_files(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Download this model to make sure it's in the cache.
+        _ = GPT2TokenizerFast.from_pretrained("gpt2")
+
+        # Under the mock environment we get a 500 error when trying to reach the tokenizer.
+        with mock.patch("requests.request", return_value=response_mock) as mock_head:
+            _ = GPT2TokenizerFast.from_pretrained("gpt2")
+            # This check we did call the fake head request
+            mock_head.assert_called()
+
     def test_legacy_load_from_one_file(self):
         # This test is for deprecated behavior and can be removed in v5
         try:
diff --git a/tests/utils/test_hub_utils.py b/tests/utils/test_hub_utils.py
index f55a0ae431..c8c7d0faad 100644
--- a/tests/utils/test_hub_utils.py
+++ b/tests/utils/test_hub_utils.py
@@ -15,8 +15,10 @@ import json
 import os
 import tempfile
 import unittest
+import unittest.mock as mock
 from pathlib import Path
 
+from requests.exceptions import HTTPError
 from transformers.utils import (
     CONFIG_NAME,
     FLAX_WEIGHTS_NAME,
@@ -79,6 +81,19 @@ class GetFromCacheTests(unittest.TestCase):
         path = cached_file(RANDOM_BERT, "conf", local_files_only=True, _raise_exceptions_for_missing_entries=False)
         self.assertIsNone(path)
 
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Under the mock environment we get a 500 error when trying to reach the tokenizer.
+        with mock.patch("requests.request", return_value=response_mock) as mock_head:
+            path = cached_file(RANDOM_BERT, "conf", _raise_exceptions_for_connection_errors=False)
+            self.assertIsNone(path)
+            # This check we did call the fake head request
+            mock_head.assert_called()
+
     def test_has_file(self):
         self.assertTrue(has_file("hf-internal-testing/tiny-bert-pt-only", WEIGHTS_NAME))
         self.assertFalse(has_file("hf-internal-testing/tiny-bert-pt-only", TF2_WEIGHTS_NAME))