Warn instead of raising in BERT and GPT-2 tokenizers as well, to allow for pre-caching of tokens

This commit is contained in:
Catalin Voss
2019-03-05 12:31:45 -08:00
parent e99bc87e4d
commit 4a49c22584
2 changed files with 2 additions and 2 deletions

View File

@@ -101,7 +101,7 @@ class BertTokenizer(object):
for token in tokens:
ids.append(self.vocab[token])
if len(ids) > self.max_len:
raise ValueError(
logger.warning(
"Token indices sequence length is longer than the specified maximum "
" sequence length for this BERT model ({} > {}). Running this"
" sequence through BERT will result in indexing errors".format(len(ids), self.max_len)