Fix outdated tokenizer doc
This commit is contained in:
@@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
||||||
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
|
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
|
||||||
"""
|
"""
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
|||||||
@@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
||||||
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
|
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
|
||||||
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
|
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
|
||||||
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
|
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
|
||||||
minimum of this value (if specified) and the underlying BERT model's sequence length.
|
minimum of this value (if specified) and the underlying BERT model's sequence length.
|
||||||
never_split: List of tokens which will never be split during tokenization. Only has an effect when
|
never_split: List of tokens which will never be split during tokenization. Only has an effect when
|
||||||
do_wordpiece_only=False
|
do_basic_tokenize=True
|
||||||
"""
|
"""
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
|||||||
@@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
||||||
do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False
|
do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True
|
||||||
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
|
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
|
||||||
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
|
max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the
|
||||||
minimum of this value (if specified) and the underlying BERT model's sequence length.
|
minimum of this value (if specified) and the underlying BERT model's sequence length.
|
||||||
never_split: List of tokens which will never be split during tokenization. Only has an effect when
|
never_split: List of tokens which will never be split during tokenization. Only has an effect when
|
||||||
do_wordpiece_only=False
|
do_basic_tokenize=True
|
||||||
"""
|
"""
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
|||||||
Reference in New Issue
Block a user