Fix properties of unset special tokens in non verbose mode (#17797)

Co-authored-by: SaulLu <55560583+SaulLu@users.noreply.github.com>
This commit is contained in:
Guillaume Klein
2022-06-23 14:40:13 +02:00
committed by GitHub
parent b2fdbaccdd
commit 3eed5530ec
6 changed files with 56 additions and 24 deletions

View File

@@ -968,8 +968,9 @@ class SpecialTokensMixin:
"""
`str`: Beginning of sentence token. Log an error if used while not having been set.
"""
if self._bos_token is None and self.verbose:
logger.error("Using bos_token, but it is not set yet.")
if self._bos_token is None:
if self.verbose:
logger.error("Using bos_token, but it is not set yet.")
return None
return str(self._bos_token)
@@ -978,8 +979,9 @@ class SpecialTokensMixin:
"""
`str`: End of sentence token. Log an error if used while not having been set.
"""
if self._eos_token is None and self.verbose:
logger.error("Using eos_token, but it is not set yet.")
if self._eos_token is None:
if self.verbose:
logger.error("Using eos_token, but it is not set yet.")
return None
return str(self._eos_token)
@@ -988,8 +990,9 @@ class SpecialTokensMixin:
"""
`str`: Unknown token. Log an error if used while not having been set.
"""
if self._unk_token is None and self.verbose:
logger.error("Using unk_token, but it is not set yet.")
if self._unk_token is None:
if self.verbose:
logger.error("Using unk_token, but it is not set yet.")
return None
return str(self._unk_token)
@@ -999,8 +1002,9 @@ class SpecialTokensMixin:
`str`: Separation token, to separate context and query in an input sequence. Log an error if used while not
having been set.
"""
if self._sep_token is None and self.verbose:
logger.error("Using sep_token, but it is not set yet.")
if self._sep_token is None:
if self.verbose:
logger.error("Using sep_token, but it is not set yet.")
return None
return str(self._sep_token)
@@ -1009,8 +1013,9 @@ class SpecialTokensMixin:
"""
`str`: Padding token. Log an error if used while not having been set.
"""
if self._pad_token is None and self.verbose:
logger.error("Using pad_token, but it is not set yet.")
if self._pad_token is None:
if self.verbose:
logger.error("Using pad_token, but it is not set yet.")
return None
return str(self._pad_token)
@@ -1020,8 +1025,9 @@ class SpecialTokensMixin:
`str`: Classification token, to extract a summary of an input sequence leveraging self-attention along the full
depth of the model. Log an error if used while not having been set.
"""
if self._cls_token is None and self.verbose:
logger.error("Using cls_token, but it is not set yet.")
if self._cls_token is None:
if self.verbose:
logger.error("Using cls_token, but it is not set yet.")
return None
return str(self._cls_token)
@@ -1031,8 +1037,9 @@ class SpecialTokensMixin:
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
having been set.
"""
if self._mask_token is None and self.verbose:
logger.error("Using mask_token, but it is not set yet.")
if self._mask_token is None:
if self.verbose:
logger.error("Using mask_token, but it is not set yet.")
return None
return str(self._mask_token)
@@ -1042,8 +1049,9 @@ class SpecialTokensMixin:
`List[str]`: All the additional special tokens you may want to use. Log an error if used while not having been
set.
"""
if self._additional_special_tokens is None and self.verbose:
logger.error("Using additional_special_tokens, but it is not set yet.")
if self._additional_special_tokens is None:
if self.verbose:
logger.error("Using additional_special_tokens, but it is not set yet.")
return None
return [str(tok) for tok in self._additional_special_tokens]