update conversion scripts and __main__

This commit is contained in:
thomwolf
2019-07-16 09:41:55 +02:00
parent 352e3ff998
commit 1b35d05d4b
11 changed files with 53 additions and 20 deletions

View File

@@ -208,7 +208,8 @@ class PreTrainedTokenizer(object):
# if we're using a pretrained model, ensure the tokenizer
# wont index sequences longer than the number of positional embeddings
max_len = cls.max_model_input_sizes[pretrained_model_name_or_path]
kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
if max_len is not None and isinstance(max_len, (int, float)):
kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
# Merge resolved_vocab_files arguments in kwargs.
added_tokens_file = resolved_vocab_files.pop('added_tokens_file', None)