Fix outdated tokenizer doc

This commit is contained in:
Julien Chaumond
2019-12-17 20:07:39 -05:00
parent ea636440d1
commit a0d386455b
3 changed files with 5 additions and 5 deletions

View File

@@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer):
Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
"""
vocab_files_names = VOCAB_FILES_NAMES

View File

@@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer):
Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
minimum of this value (if specified) and the underlying BERT model's sequence length.
never_split: List of tokens which will never be split during tokenization. Only has an effect when
do_wordpiece_only=False
do_basic_tokenize=True
"""
vocab_files_names = VOCAB_FILES_NAMES

View File

@@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer):
Args:
vocab_file: Path to a one-wordpiece-per-line vocabulary file
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
minimum of this value (if specified) and the underlying BERT model's sequence length.
never_split: List of tokens which will never be split during tokenization. Only has an effect when
do_wordpiece_only=False
do_basic_tokenize=True
"""
vocab_files_names = VOCAB_FILES_NAMES