Make Transformers use cache files when hf.co is down (#16362)

* Make Transformers use cache files when hf.co is down * Fix tests * Was there a random circleCI failure? * Isolate patches * Style * Comment out the failure since it doesn't fail anymore * Better comment
2022-03-23 15:56:49 -04:00
parent 8a69e023bf
commit c595b6e6a9
13 changed files with 148 additions and 35 deletions
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -24,6 +24,7 @@ import shutil
 import sys
 import tempfile
 import unittest
+import unittest.mock as mock
 from collections import OrderedDict
 from itertools import takewhile
 from pathlib import Path
@@ -3742,6 +3743,24 @@ class TokenizerTesterMixin:
                    self.rust_tokenizer_class.from_pretrained(tmp_dir_2)


+class TokenizerUtilTester(unittest.TestCase):
+    def test_cached_files_are_used_when_internet_is_down(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = []
+        response_mock.raise_for_status.side_effect = HTTPError
+
+        # Download this model to make sure it's in the cache.
+        _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
+
+        # Under the mock environment we get a 500 error when trying to reach the model.
+        with mock.patch("transformers.utils.hub.requests.head", return_value=response_mock) as mock_head:
+            _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
+            # This check we did call the fake head request
+            mock_head.assert_called()
+
+
@is_staging_test
 class TokenizerPushToHubTester(unittest.TestCase):
    vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "bla", "blou"]