Use commit hash to look in cache instead of calling head (#18534)
* Use commit hash to look in cache instead of calling head * Add tests * Add attr for local configs too * Stupid typos * Fix tests * Update src/transformers/utils/hub.py Co-authored-by: Julien Chaumond <julien@huggingface.co> * Address Julien's comments Co-authored-by: Julien Chaumond <julien@huggingface.co>
This commit is contained in:
@@ -48,6 +48,7 @@ from transformers.testing_utils import (
|
||||
DUMMY_DIFF_TOKENIZER_IDENTIFIER,
|
||||
DUMMY_UNKNOWN_IDENTIFIER,
|
||||
SMALL_MODEL_IDENTIFIER,
|
||||
RequestCounter,
|
||||
require_tokenizers,
|
||||
slow,
|
||||
)
|
||||
@@ -213,6 +214,7 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
def test_get_tokenizer_config(self):
|
||||
# Check we can load the tokenizer config of an online model.
|
||||
config = get_tokenizer_config("bert-base-cased")
|
||||
_ = config.pop("_commit_hash", None)
|
||||
# If we ever update bert-base-cased tokenizer config, this dict here will need to be updated.
|
||||
self.assertEqual(config, {"do_lower_case": False})
|
||||
|
||||
@@ -340,3 +342,13 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
|
||||
):
|
||||
_ = AutoTokenizer.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
|
||||
|
||||
def test_cached_tokenizer_has_minimum_calls_to_head(self):
|
||||
# Make sure we have cached the tokenizer.
|
||||
_ = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||||
with RequestCounter() as counter:
|
||||
_ = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
|
||||
self.assertEqual(counter.get_request_count, 0)
|
||||
# We still have one extra call because the model does not have a added_tokens.json file
|
||||
self.assertEqual(counter.head_request_count, 2)
|
||||
self.assertEqual(counter.other_request_count, 0)
|
||||
|
||||
Reference in New Issue
Block a user