Allow custom code for Processors (#15649)

* Allow custom code for Processors

* Add more test

* Test all auto_map configs are properly set
This commit is contained in:
Sylvain Gugger
2022-02-15 09:44:35 -05:00
committed by GitHub
parent 86a7845c0c
commit 45f56580a7
9 changed files with 288 additions and 41 deletions

View File

@@ -3812,7 +3812,9 @@ class TokenizerPushToHubTester(unittest.TestCase):
with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f:
tokenizer_config = json.load(f)
self.assertEqual(tokenizer_config["auto_map"], ["custom_tokenization.CustomTokenizer", None])
self.assertDictEqual(
tokenizer_config["auto_map"], {"AutoTokenizer": ["custom_tokenization.CustomTokenizer", None]}
)
repo.push_to_hub()
@@ -3837,9 +3839,14 @@ class TokenizerPushToHubTester(unittest.TestCase):
with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f:
tokenizer_config = json.load(f)
self.assertEqual(
self.assertDictEqual(
tokenizer_config["auto_map"],
["custom_tokenization.CustomTokenizer", "custom_tokenization_fast.CustomTokenizerFast"],
{
"AutoTokenizer": [
"custom_tokenization.CustomTokenizer",
"custom_tokenization_fast.CustomTokenizerFast",
]
},
)
repo.push_to_hub()