update docstring of BERT tokenizer to reflect do_wordpiece_only
This commit is contained in:
@@ -79,8 +79,16 @@ class BertTokenizer(object):
|
|||||||
"""Constructs a BertTokenizer.
|
"""Constructs a BertTokenizer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
do_lower_case: Whether to lower case the input.
|
vocab_file: Path to a one-wordpiece-per-line vocabulary file
|
||||||
do_wordpiece_only: Whether to do basic tokenization before wordpiece.
|
do_lower_case: Whether to lower case the input
|
||||||
|
Only has an effect when do_wordpiece_only=False
|
||||||
|
do_basic_tokenize: Whether to do basic tokenization before wordpiece.
|
||||||
|
max_len: An artificial maximum length to truncate tokenized sequences to;
|
||||||
|
Effective maximum length is always the minimum of this
|
||||||
|
value (if specified) and the underlying BERT model's
|
||||||
|
sequence length.
|
||||||
|
never_split: List of tokens which will never be split during tokenization.
|
||||||
|
Only has an effect when do_wordpiece_only=False
|
||||||
"""
|
"""
|
||||||
if not os.path.isfile(vocab_file):
|
if not os.path.isfile(vocab_file):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
|||||||
Reference in New Issue
Block a user