From a0d386455b347508ea31fc88dd06cc5555255c37 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Tue, 17 Dec 2019 20:07:39 -0500 Subject: [PATCH] Fix outdated tokenizer doc --- templates/adding_a_new_model/tokenization_xxx.py | 2 +- transformers/tokenization_bert.py | 4 ++-- transformers/tokenization_distilbert.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/adding_a_new_model/tokenization_xxx.py b/templates/adding_a_new_model/tokenization_xxx.py index 3d6b4ad9df..7a10a41e5a 100644 --- a/templates/adding_a_new_model/tokenization_xxx.py +++ b/templates/adding_a_new_model/tokenization_xxx.py @@ -85,7 +85,7 @@ class XxxTokenizer(PreTrainedTokenizer): Args: vocab_file: Path to a one-wordpiece-per-line vocabulary file - do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False + do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True """ vocab_files_names = VOCAB_FILES_NAMES diff --git a/transformers/tokenization_bert.py b/transformers/tokenization_bert.py index ded5072e58..7ab8029da8 100644 --- a/transformers/tokenization_bert.py +++ b/transformers/tokenization_bert.py @@ -113,12 +113,12 @@ class BertTokenizer(PreTrainedTokenizer): Args: vocab_file: Path to a one-wordpiece-per-line vocabulary file - do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False + do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True do_basic_tokenize: Whether to do basic tokenization before wordpiece. max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the minimum of this value (if specified) and the underlying BERT model's sequence length. never_split: List of tokens which will never be split during tokenization. Only has an effect when - do_wordpiece_only=False + do_basic_tokenize=True """ vocab_files_names = VOCAB_FILES_NAMES diff --git a/transformers/tokenization_distilbert.py b/transformers/tokenization_distilbert.py index f40bf2bd77..2f245d71dc 100644 --- a/transformers/tokenization_distilbert.py +++ b/transformers/tokenization_distilbert.py @@ -53,12 +53,12 @@ class DistilBertTokenizer(BertTokenizer): Args: vocab_file: Path to a one-wordpiece-per-line vocabulary file - do_lower_case: Whether to lower case the input. Only has an effect when do_wordpiece_only=False + do_lower_case: Whether to lower case the input. Only has an effect when do_basic_tokenize=True do_basic_tokenize: Whether to do basic tokenization before wordpiece. max_len: An artificial maximum length to truncate tokenized sequences to; Effective maximum length is always the minimum of this value (if specified) and the underlying BERT model's sequence length. never_split: List of tokens which will never be split during tokenization. Only has an effect when - do_wordpiece_only=False + do_basic_tokenize=True """ vocab_files_names = VOCAB_FILES_NAMES