[tokenizer] sanitize saved config (#21483)
* [tokenizer] sanitize saved config * rm config["name_or_path"] test
This commit is contained in:
@@ -2153,6 +2153,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
if self._auto_class is not None:
|
if self._auto_class is not None:
|
||||||
custom_object_save(self, save_directory, config=tokenizer_config)
|
custom_object_save(self, save_directory, config=tokenizer_config)
|
||||||
|
|
||||||
|
# remove private information
|
||||||
|
if "name_or_path" in tokenizer_config:
|
||||||
|
tokenizer_config.pop("name_or_path")
|
||||||
|
|
||||||
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
|
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
|
||||||
out_str = json.dumps(tokenizer_config, indent=2, sort_keys=True, ensure_ascii=False) + "\n"
|
out_str = json.dumps(tokenizer_config, indent=2, sort_keys=True, ensure_ascii=False) + "\n"
|
||||||
f.write(out_str)
|
f.write(out_str)
|
||||||
|
|||||||
@@ -230,8 +230,6 @@ class AutoTokenizerTest(unittest.TestCase):
|
|||||||
|
|
||||||
# Check the class of the tokenizer was properly saved (note that it always saves the slow class).
|
# Check the class of the tokenizer was properly saved (note that it always saves the slow class).
|
||||||
self.assertEqual(config["tokenizer_class"], "BertTokenizer")
|
self.assertEqual(config["tokenizer_class"], "BertTokenizer")
|
||||||
# Check other keys just to make sure the config was properly saved /reloaded.
|
|
||||||
self.assertEqual(config["name_or_path"], SMALL_MODEL_IDENTIFIER)
|
|
||||||
|
|
||||||
def test_new_tokenizer_registration(self):
|
def test_new_tokenizer_registration(self):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user