diff --git a/src/transformers/models/bert_generation/tokenization_bert_generation.py b/src/transformers/models/bert_generation/tokenization_bert_generation.py index f8d49f86ac..3b6298fcbd 100644 --- a/src/transformers/models/bert_generation/tokenization_bert_generation.py +++ b/src/transformers/models/bert_generation/tokenization_bert_generation.py @@ -51,15 +51,19 @@ class BertGenerationTokenizer(PreTrainedTokenizer): vocab_file (`str`): [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that contains the vocabulary necessary to instantiate a tokenizer. - eos_token (`str`, *optional*, defaults to `""`): - The end of sequence token. bos_token (`str`, *optional*, defaults to `""`): The begin of sequence token. + eos_token (`str`, *optional*, defaults to `""`): + The end of sequence token. unk_token (`str`, *optional*, defaults to `""`): The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this token instead. pad_token (`str`, *optional*, defaults to `""`): The token used for padding, for example when batching sequences of different lengths. + sep_token (`str`, *optional*, defaults to `"<::::>"`): + The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for + sequence classification or for a text and a question for question answering. It is also used as the last + token of a sequence built with special tokens. sp_model_kwargs (`dict`, *optional*): Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things, diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 8a9aa1cf76..f46ad8995c 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -94,7 +94,6 @@ OBJECTS_TO_IGNORE = [ "BarthezTokenizerFast", "BeitModel", "BertConfig", - "BertGenerationTokenizer", "BertJapaneseTokenizer", "BertModel", "BertTokenizerFast",