[AutoProcessor] Correct AutoProcessor and automatically add processor… (#14881)
* [AutoProcessor] Correct AutoProcessor and automatically add processor class * up * up * up * up * up * up * up * up * continue tomorrow * up * up * up * make processor class private * fix loop
This commit is contained in:
committed by
GitHub
parent
d7d60df0ec
commit
a1392883ce
@@ -1444,6 +1444,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
self.init_inputs = ()
|
||||
self.init_kwargs = copy.deepcopy(kwargs)
|
||||
self.name_or_path = kwargs.pop("name_or_path", "")
|
||||
self._processor_class = kwargs.pop("processor_class", None)
|
||||
|
||||
# For backward compatibility we fallback to set model_max_length from max_len if provided
|
||||
model_max_length = kwargs.pop("model_max_length", kwargs.pop("max_len", None))
|
||||
@@ -1505,6 +1506,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
"Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
|
||||
)
|
||||
|
||||
def _set_processor_class(self, processor_class: str):
|
||||
"""Sets processor class as an attribute."""
|
||||
self._processor_class = processor_class
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"{'PreTrainedTokenizerFast' if self.is_fast else 'PreTrainedTokenizer'}(name_or_path='{self.name_or_path}', "
|
||||
@@ -2029,6 +2034,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
tokenizer_config["tokenizer_class"] = tokenizer_class
|
||||
if getattr(self, "_auto_map", None) is not None:
|
||||
tokenizer_config["auto_map"] = self._auto_map
|
||||
if getattr(self, "_processor_class", None) is not None:
|
||||
tokenizer_config["processor_class"] = self._processor_class
|
||||
|
||||
with open(tokenizer_config_file, "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(tokenizer_config, ensure_ascii=False))
|
||||
|
||||
Reference in New Issue
Block a user