Allow custom code for Processors (#15649)

* Allow custom code for Processors

* Add more test

* Test all auto_map configs are properly set
This commit is contained in:
Sylvain Gugger
2022-02-15 09:44:35 -05:00
committed by GitHub
parent 86a7845c0c
commit 45f56580a7
9 changed files with 288 additions and 41 deletions

View File

@@ -310,6 +310,38 @@ class AutoTokenizerTest(unittest.TestCase):
if CustomConfig in TOKENIZER_MAPPING._extra_content:
del TOKENIZER_MAPPING._extra_content[CustomConfig]
def test_from_pretrained_dynamic_tokenizer(self):
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/test_dynamic_tokenizer", trust_remote_code=True)
self.assertTrue(tokenizer.special_attribute_present)
if is_tokenizers_available():
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizerFast")
# Test we can also load the slow version
tokenizer = AutoTokenizer.from_pretrained(
"hf-internal-testing/test_dynamic_tokenizer", trust_remote_code=True, use_fast=False
)
self.assertTrue(tokenizer.special_attribute_present)
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
else:
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
def test_from_pretrained_dynamic_tokenizer_legacy_format(self):
tokenizer = AutoTokenizer.from_pretrained(
"hf-internal-testing/test_dynamic_tokenizer_legacy", trust_remote_code=True
)
self.assertTrue(tokenizer.special_attribute_present)
if is_tokenizers_available():
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizerFast")
# Test we can also load the slow version
tokenizer = AutoTokenizer.from_pretrained(
"hf-internal-testing/test_dynamic_tokenizer_legacy", trust_remote_code=True, use_fast=False
)
self.assertTrue(tokenizer.special_attribute_present)
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
else:
self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
def test_repo_not_found(self):
with self.assertRaisesRegex(
EnvironmentError, "bert-base is not a local folder and is not a valid model identifier"